# Load in the taxonomy and shared data
otu_tax <- "mothur/stability.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.an.unique_list.0.03.cons.taxonomy"
otu_shared <- "mothur/stability.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.an.unique_list.shared"
otu_dat <- import_mothur(mothur_shared_file = otu_shared, mothur_constaxonomy_file = otu_tax)
# Fix the taxonomy names
colnames(tax_table(otu_data)) <- c("Kingdom","Phylum","Class","Order","Family","Genus","Species")
## Error in colnames(tax_table(otu_data)) <- c("Kingdom", "Phylum", "Class", : object 'otu_data' not found
################################################
########## ADD THE PROTEOBACTERIA TO THE PHYLA
phy <- data.frame(tax_table(otu_data))
## Error in tax_table(otu_data): object 'otu_data' not found
Phylum <- as.character(phy$Phylum)
## Error in eval(expr, envir, enclos): object 'phy' not found
Class <- as.character(phy$Class)
## Error in eval(expr, envir, enclos): object 'phy' not found
for (i in 1:length(Phylum)){
if (Phylum[i] == "Proteobacteria"){
Phylum[i] <- Class[i]
}
}
## Error in eval(expr, envir, enclos): object 'Phylum' not found
phy$Phylum <- Phylum
## Error in eval(expr, envir, enclos): object 'Phylum' not found
t <- tax_table(as.matrix(phy))
## Error in as.matrix(phy): object 'phy' not found
tax_table(otu_data) <- t
## Error in tax_table(otu_data) <- t: object 'otu_data' not found
################################################
# Sample Names
samp_names <- colnames(otu_table(otu_data))
## Error in otu_table(otu_data): object 'otu_data' not found
# Create metadata info
df <- data.frame(matrix(NA, ncol = 1, nrow = length(samp_names)))
## Error in matrix(NA, ncol = 1, nrow = length(samp_names)): object 'samp_names' not found
colnames(df) <- c("names")
## Error in `colnames<-`(`*tmp*`, value = "names"): attempt to set 'colnames' on an object with less than two dimensions
df$names <- samp_names
## Error in eval(expr, envir, enclos): object 'samp_names' not found
# Create metadata info
meta_df <- make_muskegon_metadata(df)
# Create a norep_water_name column
meta_df$norep_water_name <- paste(substr(meta_df$names,1,4),substr(meta_df$names,7,9), sep = "")
# Load in the extra metadata for the Muskegon Lake Project
musk_data <- read.csv("metadata/processed_muskegon_metadata.csv", header = TRUE) # Load in the extra metada
musk_data_subset <- select(musk_data, -c(lakesite, limnion, month, year, project, season))
complete_meta_df <- left_join(meta_df, musk_data_subset, by = "norep_water_name")
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factor and character vector, coercing into character vector
row.names(complete_meta_df) <- complete_meta_df$names
complete_meta_df$water_name <- paste(substr(complete_meta_df$names,1,5),substr(complete_meta_df$names,7,9), sep = "")
complete_meta_df$norep_filter_name <- paste(substr(complete_meta_df$names,1,4),substr(complete_meta_df$names,6,9), sep = "")
## Read in the Oligotyping Count file
oligo_count <- read.table("oligotyping/MATRIX-COUNT.txt", header = TRUE, row.names = 1) # Read in the oligotyping file making the first row and columns the column and row names
colnames(oligo_count) <- gsub("X", "", colnames(oligo_count)) # Remove the X that R automatically puts in front of the node numbers in the column names
oligo_count <- oligo_count[ , order(names(oligo_count))] # Order the node numbers to match with taxonomy.
## Read in the Oligotyping Taxonomy File
raw_oligo_tax <- read.table("oligotyping/oligo.taxonomy") # Read in the oligotyping file making the first row and columns the column and row names
cols <- data.frame(str_split_fixed(raw_oligo_tax[,1], "size_", 2)) # Requires stringr package!
colnames(cols) <- c("Oligotype", "Size")
#cols$Size <- as.numeric(cols$Size)
cols$Oligotype <- gsub("\\|.*","",cols$Oligotype)
oligo_tax <- cbind(cols, raw_oligo_tax$V2)
colnames(oligo_tax)[3] <- "Taxonomy"
oligo_tax <- arrange(oligo_tax, Oligotype) # Order the node numbers to match with oligotype count table
oligo_tax$Size = NULL
row.names(oligo_tax) <- oligo_tax$Oligotype
oligo_tax$Oligotype = NULL
# Separate the taxonomy
tax <- data.frame(str_split_fixed(oligo_tax$Taxonomy, ";", 7)) # Requires stringr package!
tax <- apply(tax, 2, function(y) gsub("\\s*\\([^\\)]+\\)","",as.character(y))) # remove parentheses with everything inside it
colnames(tax) <- c("Kingdom","Phylum","Class","Order","Family","Genus", "Species")
tax <- data.frame(tax)
tax$Species <- gsub(";","",tax$Species) # Remove the final ; in the species column
row.names(tax) <- row.names(oligo_tax)
### Create a phyloseq object
oligoCOUNT <- otu_table(oligo_count, taxa_are_rows = FALSE)
oligoTAX <- tax_table(as.matrix(tax))
oligo_data <- phyloseq(oligoCOUNT, oligoTAX)
# Add the metadata to our phyloseq object!
## Add metadata to OTUs
sample_data(otu_data) <- complete_meta_df
otu_data <- prune_taxa(taxa_sums(otu_data) > 0, otu_data)
## Add metadata to Oligotypes
sample_data(oligo_data) <- complete_meta_df
oligo_data <- prune_taxa(taxa_sums(oligo_data) > 0, oligo_data)
# Merged metadata
merged_complete_meta_df<-
select(complete_meta_df, -c(names, replicate, nuc_acid_type, water_name)) %>%
distinct() %>%
arrange(norep_filter_name)
## Add Production data from GVSU AWRI provided by the lab of Bopi Biddanda
bopi_data <- read.csv("metadata/production_data.csv") %>%
dplyr::rename(norep_filter_name = names) %>% # rename "names" column to "norep_filter_name"
select(-c(X, limnion, fraction, month, year, season))
## Merge two metadata files together!
df1 <- full_join(merged_complete_meta_df, bopi_data) %>%
select(-c(Depth, month))
## Joining, by = "norep_filter_name"
## Warning in full_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factor and character vector, coercing into character vector
# provide row.names to match sample
row.names(df1) <- df1$norep_filter_name
# merge samples for OTUs
otu_merged <- merge_samples(otu_data, group = "norep_filter_name", fun = "sum")
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
sample_names(otu_merged) == row.names(merged_complete_meta_df) # Sanity check
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [23] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [34] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [45] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [56] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [67] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [78] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [89] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [100] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [111] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [122] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [144] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [155] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [166] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [177] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [188] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [199] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [210] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [221] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [232] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [243] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [254] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [265] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [276] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [287] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [298] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [309] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [320] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [331] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [342] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [353] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [364] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [375] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [386] FALSE FALSE
# merge samples for Oligotypes
oligo_merged <- merge_samples(oligo_data, group = "norep_filter_name", fun = "sum")
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
# Add nice sample information
sample_data(otu_merged) <- df1
sample_data(oligo_merged) <- df1
# Subset Mukegon Lake samples out of the OTU phyloseq object
otu_merged_musk <- subset_samples(physeq = otu_merged, project == "Muskegon_Lake")
# Subset Mukegon Lake samples out of the Oligotype phyloseq object
oligo_merged_musk <- subset_samples(physeq = oligo_merged, project == "Muskegon_Lake")
# Check the sequencing depth of each sample
sums_otu <- data.frame(rowSums(otu_table(otu_merged_musk)))
colnames(sums_otu) <- "Sample_TotalSeqs"
sums_otu$sample <- row.names(sums_otu)
sums_otu <- arrange(sums_otu, Sample_TotalSeqs) %>%
filter(Sample_TotalSeqs < 10000)
## OTU: SUBSAMPLE AT 6600
#### Create a plot of the number of sequences per sample
ggplot(sums_otu, aes(x=reorder(sample, Sample_TotalSeqs), y = Sample_TotalSeqs)) +
ylab("Number of Sequences per Sample") +
geom_bar(stat = "identity", colour="black",fill="cornflowerblue") + xlab("Sample Name") +
ggtitle("OTU: Samples less than 10,000 reads") +
theme(axis.text.x = element_text(colour = "black", size=16, angle=45, hjust = 1, vjust = 1))
# Check the sequencing depth of each sample
sums_oligo <- data.frame(rowSums(otu_table(oligo_merged_musk)))
colnames(sums_oligo) <- "Sample_TotalSeqs"
sums_oligo$sample <- row.names(sums_oligo)
sums_oligo <- arrange(sums_oligo, Sample_TotalSeqs) %>%
filter(Sample_TotalSeqs < 5000)
sums_oligo$names <- sums_oligo$sample
o <- make_metadata_norep(sums_oligo)
o1 <- filter(o, fraction == "WholePart" & limnion == "Top")
# OLIGOTYPING: SUBSAMPLE AT 4300
#### Create a plot of the number of sequences per sample
ggplot(sums_oligo, aes(x=reorder(sample, Sample_TotalSeqs), y = Sample_TotalSeqs)) +
ylab("Number of Sequences per Sample") +
geom_bar(stat = "identity", colour="black",fill="cornflowerblue") + xlab("Sample Name") +
ggtitle("Oligotyping") +
theme(axis.text.x = element_text(colour = "black", size=16, angle=45, hjust = 1, vjust = 1))
# Histogram of sample read counts for the read counts of each sample
ggplot(data.frame(sum = sample_sums(oligo_merged_musk)), aes(x = sum)) +
geom_histogram(color = "black", fill = "purple", binwidth = 1000) +
ggtitle("Distribution of sample sequencing depth") +
xlab("Read counts") +
scale_x_continuous(expand = c(0,0)) + scale_y_continuous(expand = c(0,0)) +
theme(axis.title.y = element_blank())
# OTU analysis
# Create a new file called "otu_merged_musk_Physeq.RData" that has the phyloseq object
#save(list=c("otu_merged_musk"), file=paste0("Phenoflow/otu_merged_musk_Physeq.RData")) # Will be run on Flux
otu_phenoflow_alpha <- get(load("Phenoflow/otu_phenoflow/Phenoflow_diversity.RData"))
# Add the rownames so that it can be combined with the metadata
otu_phenoflow_alpha <- data.frame(otu_phenoflow_alpha, norep_filter_name = rownames(otu_phenoflow_alpha))
# Create the metadata information
meta_dat <- data.frame(sample_data(otu_merged_musk))
# Merge phenoflow data and
otu_phenoflow_alpha <- merge(otu_phenoflow_alpha, meta_dat, by = "norep_filter_name") %>%
select(-sd.D0.bre, -D0.bre)
### PREPARE DATA FRAMES FOR PHENOFLOW ALPHA DIVERSITY ANALYSIS
free_otu_alpha <- filter(otu_phenoflow_alpha, fraction == "Free" & norep_filter_name != "MOTHJ715" & year == "2015")
part_otu_alpha <- filter(otu_phenoflow_alpha, fraction == "WholePart" & norep_filter_name != "MOTHJ715" & year == "2015")
# Can fractional production be predicted by phenoflow D0 observed richness?
# FREE LIVING
free_otu_D0_stats <- lm(frac_bacprod ~ D0, data = free_otu_alpha)
free_otu_D0_stats
##
## Call:
## lm(formula = frac_bacprod ~ D0, data = free_otu_alpha)
##
## Coefficients:
## (Intercept) D0
## 11.22987 0.01446
summary(free_otu_D0_stats)
##
## Call:
## lm(formula = frac_bacprod ~ D0, data = free_otu_alpha)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.637 -10.917 -4.362 7.947 34.925
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.22987 12.55657 0.894 0.392
## D0 0.01446 0.01297 1.115 0.291
##
## Residual standard error: 17.34 on 10 degrees of freedom
## (12 observations deleted due to missingness)
## Multiple R-squared: 0.1106, Adjusted R-squared: 0.02164
## F-statistic: 1.243 on 1 and 10 DF, p-value: 0.2909
# PARTICLE
part_otu_D0_stats <- lm(frac_bacprod ~ D0, data = part_otu_alpha)
part_otu_D0_stats
##
## Call:
## lm(formula = frac_bacprod ~ D0, data = part_otu_alpha)
##
## Coefficients:
## (Intercept) D0
## -5.75443 0.01852
summary(part_otu_D0_stats)
##
## Call:
## lm(formula = frac_bacprod ~ D0, data = part_otu_alpha)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.558 -2.111 -1.010 4.205 7.624
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.754434 3.816457 -1.508 0.16253
## D0 0.018516 0.004149 4.463 0.00121 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.007 on 10 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.6658, Adjusted R-squared: 0.6323
## F-statistic: 19.92 on 1 and 10 DF, p-value: 0.00121
# Plot D0 Observed Richness
otu_pheno_D0 <- ggplot(otu_phenoflow_alpha, aes(x=D0, y=frac_bacprod, color = fraction)) +
geom_point(size = 3.5) + geom_errorbarh(aes(xmin = D0 - sd.D0, xmax = D0 + sd.D0), width = 0.2) +
ggtitle("OTU: Phenoflow") +
scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
scale_x_continuous(limits = c(0,2000)) +
scale_y_continuous(limits = c(0,70),expand = c(0,0)) +
ylab("Production (μgC/L/hr)") + xlab("Observed Richness (D0)") +
#geom_smooth(data=subset(otu_phenoflow_alpha, fraction == "Free"), method='lm') +
geom_smooth(data=subset(otu_phenoflow_alpha, fraction == "WholePart"), method='lm') +
theme(legend.position=c(0.15,0.9), legend.title=element_blank()) +
annotate("text", x = 1000, y=35, color = "cornflowerblue", fontface = "bold",
label = paste("R2 =", round(summary(free_otu_D0_stats)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(free_otu_D0_stats)$coefficients[,4][2]), digits = 4))) +
annotate("text", x = 1600, y=8, color = "firebrick3", fontface = "bold",
label = paste("R2 =", round(summary(part_otu_D0_stats)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(part_otu_D0_stats)$coefficients[,4][2]), digits = 4)))
## Warning: Ignoring unknown parameters: width
# Can fractional production be predicted by phenoflow chao1?
# FREE LIVING
free_otu_D0chao_stats <- lm(frac_bacprod ~ D0.chao, data = free_otu_alpha)
free_otu_D0chao_stats
##
## Call:
## lm(formula = frac_bacprod ~ D0.chao, data = free_otu_alpha)
##
## Coefficients:
## (Intercept) D0.chao
## 10.294438 0.008147
summary(free_otu_D0chao_stats)
##
## Call:
## lm(formula = frac_bacprod ~ D0.chao, data = free_otu_alpha)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.648 -12.410 -4.374 8.313 31.472
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.294438 10.885944 0.946 0.367
## D0.chao 0.008147 0.005765 1.413 0.188
##
## Residual standard error: 16.79 on 10 degrees of freedom
## (12 observations deleted due to missingness)
## Multiple R-squared: 0.1665, Adjusted R-squared: 0.08312
## F-statistic: 1.997 on 1 and 10 DF, p-value: 0.188
# PARTICLE
part_otu_D0chao_stats <- lm(frac_bacprod ~ D0.chao, data = part_otu_alpha)
part_otu_D0chao_stats
##
## Call:
## lm(formula = frac_bacprod ~ D0.chao, data = part_otu_alpha)
##
## Coefficients:
## (Intercept) D0.chao
## -5.9672 0.0106
summary(part_otu_D0chao_stats)
##
## Call:
## lm(formula = frac_bacprod ~ D0.chao, data = part_otu_alpha)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.6688 -2.7353 0.8518 2.4842 5.4200
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.967203 3.069404 -1.944 0.080533 .
## D0.chao 0.010596 0.001869 5.671 0.000207 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.218 on 10 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.7628, Adjusted R-squared: 0.7391
## F-statistic: 32.16 on 1 and 10 DF, p-value: 0.0002065
# Plot chao1
otu_pheno_D0chao <- ggplot(otu_phenoflow_alpha, aes(x=D0.chao, y=frac_bacprod, color = fraction)) +
geom_point(size = 3.5) + geom_errorbarh(aes(xmin = D0.chao - sd.D0.chao, xmax = D0.chao + sd.D0.chao), width = 0.2) +
ggtitle("OTU: Phenoflow") +
scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
scale_x_continuous(limits = c(200,4000)) +
scale_y_continuous(limits = c(0,70),expand = c(0,0)) +
ylab("Production (μgC/L/hr)") + xlab("Chao1 (D0)") +
#geom_smooth(data=subset(otu_phenoflow_alpha, fraction == "Free"), method='lm') +
geom_smooth(data=subset(otu_phenoflow_alpha, fraction == "WholePart"), method='lm') +
theme(legend.position=c(0.15,0.9), legend.title=element_blank()) +
annotate("text", x = 2000, y=35, color = "cornflowerblue", fontface = "bold",
label = paste("R2 =", round(summary(free_otu_D0chao_stats)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(free_otu_D0chao_stats)$coefficients[,4][2]), digits = 4))) +
annotate("text", x = 2700, y=8, color = "firebrick3", fontface = "bold",
label = paste("R2 =", round(summary(part_otu_D0chao_stats)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(part_otu_D0chao_stats)$coefficients[,4][2]), digits = 4)))
## Warning: Ignoring unknown parameters: width
# Can fractional production be predicted by phenoflow D1 SHANNON ENTROPY?
# FREE LIVING
free_otu_D1_stats <- lm(frac_bacprod ~ D1, data = free_otu_alpha)
free_otu_D1_stats
##
## Call:
## lm(formula = frac_bacprod ~ D1, data = free_otu_alpha)
##
## Coefficients:
## (Intercept) D1
## 11.1880 0.1906
summary(free_otu_D1_stats)
##
## Call:
## lm(formula = frac_bacprod ~ D1, data = free_otu_alpha)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.768 -10.512 -2.281 7.633 34.265
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.1880 18.3486 0.610 0.556
## D1 0.1906 0.2605 0.732 0.481
##
## Residual standard error: 17.91 on 10 degrees of freedom
## (12 observations deleted due to missingness)
## Multiple R-squared: 0.05082, Adjusted R-squared: -0.04409
## F-statistic: 0.5355 on 1 and 10 DF, p-value: 0.4811
# PARTICLE
part_otu_D1_stats <- lm(frac_bacprod ~ D1, data = part_otu_alpha)
part_otu_D1_stats
##
## Call:
## lm(formula = frac_bacprod ~ D1, data = part_otu_alpha)
##
## Coefficients:
## (Intercept) D1
## 1.03603 0.06921
summary(part_otu_D1_stats)
##
## Call:
## lm(formula = frac_bacprod ~ D1, data = part_otu_alpha)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.068 -2.382 -1.078 3.249 10.867
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.03603 2.81221 0.368 0.72025
## D1 0.06921 0.01792 3.862 0.00315 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.487 on 10 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.5986, Adjusted R-squared: 0.5585
## F-statistic: 14.91 on 1 and 10 DF, p-value: 0.003151
# Plot D1 SHANNON ENTROPY
otu_pheno_D1 <- ggplot(otu_phenoflow_alpha, aes(x=D1, y=frac_bacprod, color = fraction)) +
geom_point(size = 3.5) + geom_errorbarh(aes(xmin = D1 - sd.D1, xmax = D1 + sd.D1), width = 0.2) +
ggtitle("OTU: Phenoflow") +
scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
scale_x_continuous(limits = c(0,400), expand = c(0,0)) +
scale_y_continuous(limits = c(0,70),expand = c(0,0)) +
ylab("Production (μgC/L/hr)") + xlab("Shannon Entropy (D1)") +
#geom_smooth(data=subset(otu_phenoflow_alpha, fraction == "Free"), method='lm') +
geom_smooth(data=subset(otu_phenoflow_alpha, fraction == "WholePart"), method='lm') +
theme(legend.position=c(0.85,0.9), legend.title=element_blank()) +
annotate("text", x = 175, y=35, color = "cornflowerblue", fontface = "bold",
label = paste("R2 =", round(summary(free_otu_D1_stats)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(free_otu_D1_stats)$coefficients[,4][2]), digits = 4))) +
annotate("text", x = 275, y=7, color = "firebrick3", fontface = "bold",
label = paste("R2 =", round(summary(part_otu_D1_stats)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(part_otu_D1_stats)$coefficients[,4][2]), digits = 4)))
## Warning: Ignoring unknown parameters: width
# Can fractional production be predicted by phenoflow D2 INVERSE SIMPSON?
# FREE LIVING
free_otu_D2_stats <- lm(frac_bacprod ~ D2, data = free_otu_alpha)
free_otu_D2_stats
##
## Call:
## lm(formula = frac_bacprod ~ D2, data = free_otu_alpha)
##
## Coefficients:
## (Intercept) D2
## 11.4212 0.4847
summary(free_otu_D2_stats)
##
## Call:
## lm(formula = frac_bacprod ~ D2, data = free_otu_alpha)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.602 -10.034 -1.905 6.828 35.512
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.4212 21.8934 0.522 0.613
## D2 0.4847 0.8148 0.595 0.565
##
## Residual standard error: 18.07 on 10 degrees of freedom
## (12 observations deleted due to missingness)
## Multiple R-squared: 0.03418, Adjusted R-squared: -0.0624
## F-statistic: 0.3539 on 1 and 10 DF, p-value: 0.5651
# PARTICLE
part_otu_D2_stats <- lm(frac_bacprod ~ D2, data = part_otu_alpha)
part_otu_D2_stats
##
## Call:
## lm(formula = frac_bacprod ~ D2, data = part_otu_alpha)
##
## Coefficients:
## (Intercept) D2
## 0.02044 0.26298
summary(part_otu_D2_stats)
##
## Call:
## lm(formula = frac_bacprod ~ D2, data = part_otu_alpha)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.2971 -2.1906 -0.2354 1.3639 7.6078
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.02044 2.33018 0.009 0.993173
## D2 0.26298 0.05084 5.173 0.000417 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.517 on 10 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.7279, Adjusted R-squared: 0.7007
## F-statistic: 26.76 on 1 and 10 DF, p-value: 0.0004174
# Plot D2 INVERSE SIMPSON
otu_pheno_D2 <- ggplot(otu_phenoflow_alpha, aes(x=D2, y=frac_bacprod, color = fraction)) +
geom_point(size = 3.5) + geom_errorbarh(aes(xmin = D2 - sd.D2, xmax = D2 + sd.D2), width = 0.2) +
ggtitle("OTU: Phenoflow") +
scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
scale_x_continuous(limits = c(0,80), expand = c(0,0)) +
scale_y_continuous(limits = c(0,70),expand = c(0,0)) +
ylab("Production (μgC/L/hr)") + xlab("Inverse Simpson (D2)") +
#geom_smooth(data=subset(otu_phenoflow_alpha, fraction == "Free"), method='lm') +
geom_smooth(data=subset(otu_phenoflow_alpha, fraction == "WholePart"), method='lm') +
theme(legend.position=c(0.85,0.9), legend.title=element_blank()) +
annotate("text", x = 40, y=45, color = "cornflowerblue", fontface = "bold",
label = paste("R2 =", round(summary(free_otu_D2_stats)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(free_otu_D2_stats)$coefficients[,4][2]), digits = 4))) +
annotate("text", x = 50, y=25, color = "firebrick3", fontface = "bold",
label = paste("R2 =", round(summary(part_otu_D2_stats)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(part_otu_D2_stats)$coefficients[,4][2]), digits = 4)))
## Warning: Ignoring unknown parameters: width
otu_phenoflow <- plot_grid(otu_pheno_D0, otu_pheno_D0chao, otu_pheno_D1, otu_pheno_D2,
labels = c("A", "B", "C", "D"),
align = "h", nrow = 2, ncol = 2)
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 139 rows containing missing values (geom_point).
## Warning: Removed 116 rows containing missing values (geom_errorbarh).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 139 rows containing missing values (geom_point).
## Warning: Removed 116 rows containing missing values (geom_errorbarh).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 139 rows containing missing values (geom_point).
## Warning: Removed 115 rows containing missing values (geom_errorbarh).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
## Warning: Removed 14 rows containing non-finite values (stat_smooth).
## Warning: Removed 141 rows containing missing values (geom_point).
## Warning: Removed 117 rows containing missing values (geom_errorbarh).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
otu_phenoflow
#ggsave("Figures/Phenoflow_otu.png", otu_phenoflow, dpi = 600, units = "in", width = 10, height = 8)
# OTU analysis
# Create a new file called "otu_merged_musk_Physeq.RData" that has the phyloseq object
#save(list=c("otu_merged_musk"), file=paste0("Phenoflow/otu_merged_musk_Physeq.RData")) # Will be run on Flux
#Phenoflow::Diversity_16S(otu_merged_musk, R=3)
# Oligotyping
#oligo_phenoflow <- Diversity_16S(oligo_merged_musk, R=100, brea = FALSE)
#write.table(dat, "PhenoFlow/oligo.phenoflow", sep = "\t")
# Do not overwrite the important dataframe!
oligo_phenoflow_alpha <- read.table("PhenoFlow/oligo.phenoflow", sep = "\t", header = TRUE)
# Add the rownames so that it can be combined with the metadata
oligo_phenoflow_alpha <- data.frame(oligo_phenoflow_alpha, norep_filter_name = rownames(oligo_phenoflow_alpha))
# Create the metadata information
meta_dat <- data.frame(sample_data(oligo_merged_musk))
# Merge phenoflow data and
oligo_phenoflow_alpha <- merge(oligo_phenoflow_alpha, meta_dat, by = "norep_filter_name") %>%
select(-sd.D0.bre, -D0.bre)
### PREPARE DATA FRAMES FOR PHENOFLOW ALPHA DIVERSITY ANALYSIS
free_oligo_alpha <- filter(oligo_phenoflow_alpha, fraction == "Free" & norep_filter_name != "MOTHJ715" & year == "2015")
part_oligo_alpha <- filter(oligo_phenoflow_alpha, fraction == "WholePart" & norep_filter_name != "MOTHJ715" & year == "2015")
# Can fractional production be predicted by phenoflow D0 observed richness?
# FREE LIVING
free_oligo_D0_stats <- lm(frac_bacprod ~ D0, data = free_oligo_alpha)
free_oligo_D0_stats
##
## Call:
## lm(formula = frac_bacprod ~ D0, data = free_oligo_alpha)
##
## Coefficients:
## (Intercept) D0
## 13.29673 0.03747
summary(free_oligo_D0_stats)
##
## Call:
## lm(formula = frac_bacprod ~ D0, data = free_oligo_alpha)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.127 -13.686 -1.857 6.963 39.476
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.29673 63.77924 0.208 0.839
## D0 0.03747 0.22105 0.170 0.869
##
## Residual standard error: 18.36 on 10 degrees of freedom
## (12 observations deleted due to missingness)
## Multiple R-squared: 0.002865, Adjusted R-squared: -0.09685
## F-statistic: 0.02873 on 1 and 10 DF, p-value: 0.8688
# PARTICLE
part_oligo_D0_stats <- lm(frac_bacprod ~ D0, data = part_oligo_alpha)
part_oligo_D0_stats
##
## Call:
## lm(formula = frac_bacprod ~ D0, data = part_oligo_alpha)
##
## Coefficients:
## (Intercept) D0
## -35.8327 0.1747
summary(part_oligo_D0_stats)
##
## Call:
## lm(formula = frac_bacprod ~ D0, data = part_oligo_alpha)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.2762 -4.1880 -0.8822 5.8425 11.2597
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -35.83267 19.11629 -1.874 0.0903 .
## D0 0.17471 0.07246 2.411 0.0366 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.887 on 10 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.3676, Adjusted R-squared: 0.3044
## F-statistic: 5.814 on 1 and 10 DF, p-value: 0.03661
# Plot D0 Observed Richness
oligo_pheno_D0 <- ggplot(oligo_phenoflow_alpha, aes(x=D0, y=frac_bacprod, color = fraction)) +
geom_point(size = 3.5) + geom_errorbarh(aes(xmin = D0 - sd.D0, xmax = D0 + sd.D0), width = 0.2) +
ggtitle("Oligotyping: Phenoflow") +
scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
scale_x_continuous(limits = c(200,400)) +
scale_y_continuous(limits = c(0,70),expand = c(0,0)) +
ylab("Production (μgC/L/hr)") + xlab("Observed Richness (D0)") +
#geom_smooth(data=subset(oligo_phenoflow_alpha, fraction == "Free"), method='lm') +
geom_smooth(data=subset(oligo_phenoflow_alpha, fraction == "WholePart"), method='lm') +
theme(legend.position=c(0.15,0.9), legend.title=element_blank()) +
annotate("text", x = 370, y=25, color = "cornflowerblue", fontface = "bold",
label = paste("R2 =", round(summary(free_oligo_D0_stats)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(free_oligo_D0_stats)$coefficients[,4][2]), digits = 4))) +
annotate("text", x = 370, y=5, color = "firebrick3", fontface = "bold",
label = paste("R2 =", round(summary(part_oligo_D0_stats)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(part_oligo_D0_stats)$coefficients[,4][2]), digits = 4)))
## Warning: Ignoring unknown parameters: width
# Can fractional production be predicted by phenoflow chao1?
# FREE LIVING
free_oligo_D0chao_stats <- lm(frac_bacprod ~ D0.chao, data = free_oligo_alpha)
free_oligo_D0chao_stats
##
## Call:
## lm(formula = frac_bacprod ~ D0.chao, data = free_oligo_alpha)
##
## Coefficients:
## (Intercept) D0.chao
## -5.84212 0.09034
summary(free_oligo_D0chao_stats)
##
## Call:
## lm(formula = frac_bacprod ~ D0.chao, data = free_oligo_alpha)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.816 -13.372 -1.008 7.773 38.028
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.84212 72.99061 -0.080 0.938
## D0.chao 0.09034 0.21987 0.411 0.690
##
## Residual standard error: 18.23 on 10 degrees of freedom
## (12 observations deleted due to missingness)
## Multiple R-squared: 0.0166, Adjusted R-squared: -0.08174
## F-statistic: 0.1688 on 1 and 10 DF, p-value: 0.6898
# PARTICLE
part_oligo_D0chao_stats <- lm(frac_bacprod ~ D0.chao, data = part_oligo_alpha)
part_oligo_D0chao_stats
##
## Call:
## lm(formula = frac_bacprod ~ D0.chao, data = part_oligo_alpha)
##
## Coefficients:
## (Intercept) D0.chao
## -27.1424 0.1179
summary(part_oligo_D0chao_stats)
##
## Call:
## lm(formula = frac_bacprod ~ D0.chao, data = part_oligo_alpha)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.494 -5.694 -1.356 4.844 16.549
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -27.14237 25.92181 -1.047 0.320
## D0.chao 0.11786 0.08191 1.439 0.181
##
## Residual standard error: 7.883 on 10 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.1715, Adjusted R-squared: 0.08866
## F-statistic: 2.07 on 1 and 10 DF, p-value: 0.1808
# Plot chao1
oligo_pheno_D0chao <- ggplot(oligo_phenoflow_alpha, aes(x=D0.chao, y=frac_bacprod, color = fraction)) +
geom_point(size = 3.5) + geom_errorbarh(aes(xmin = D0.chao - sd.D0.chao, xmax = D0.chao + sd.D0.chao), width = 0.2) +
ggtitle("Oligotyping: Phenoflow") +
scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
scale_x_continuous(limits = c(200,400)) +
scale_y_continuous(limits = c(0,70),expand = c(0,0)) +
ylab("Production (μgC/L/hr)") + xlab("Chao1 (D0)") +
#geom_smooth(data=subset(oligo_phenoflow_alpha, fraction == "Free"), method='lm') +
#geom_smooth(data=subset(oligo_phenoflow_alpha, fraction == "WholePart"), method='lm') +
theme(legend.position=c(0.15,0.9), legend.title=element_blank()) +
annotate("text", x = 220, y=25, color = "cornflowerblue", fontface = "bold",
label = paste("R2 =", round(summary(free_oligo_D0chao_stats)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(free_oligo_D0chao_stats)$coefficients[,4][2]), digits = 4))) +
annotate("text", x = 220, y=5, color = "firebrick3", fontface = "bold",
label = paste("R2 =", round(summary(part_oligo_D0chao_stats)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(part_oligo_D0chao_stats)$coefficients[,4][2]), digits = 4)))
## Warning: Ignoring unknown parameters: width
# Can fractional production be predicted by phenoflow D1 SHANNON ENTROPY?
# FREE LIVING
free_oligo_D1_stats <- lm(frac_bacprod ~ D1, data = free_oligo_alpha)
free_oligo_D1_stats
##
## Call:
## lm(formula = frac_bacprod ~ D1, data = free_oligo_alpha)
##
## Coefficients:
## (Intercept) D1
## -29.372 0.758
summary(free_oligo_D1_stats)
##
## Call:
## lm(formula = frac_bacprod ~ D1, data = free_oligo_alpha)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.285 -5.322 -1.406 5.520 26.706
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -29.3722 28.9643 -1.014 0.3344
## D1 0.7580 0.4057 1.869 0.0912 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.83 on 10 degrees of freedom
## (12 observations deleted due to missingness)
## Multiple R-squared: 0.2588, Adjusted R-squared: 0.1847
## F-statistic: 3.491 on 1 and 10 DF, p-value: 0.09123
# PARTICLE
part_oligo_D1_stats <- lm(frac_bacprod ~ D1, data = part_oligo_alpha)
part_oligo_D1_stats
##
## Call:
## lm(formula = frac_bacprod ~ D1, data = part_oligo_alpha)
##
## Coefficients:
## (Intercept) D1
## -5.1569 0.2134
summary(part_oligo_D1_stats)
##
## Call:
## lm(formula = frac_bacprod ~ D1, data = part_oligo_alpha)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.341 -1.393 -0.722 1.517 8.461
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.15685 3.86546 -1.334 0.21176
## D1 0.21342 0.05018 4.253 0.00168 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.168 on 10 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.644, Adjusted R-squared: 0.6084
## F-statistic: 18.09 on 1 and 10 DF, p-value: 0.001681
# Plot D1 SHANNON ENTROPY
oligo_pheno_D1 <- ggplot(oligo_phenoflow_alpha, aes(x=D1, y=frac_bacprod, color = fraction)) +
geom_point(size = 3.5) + geom_errorbarh(aes(xmin = D1 - sd.D1, xmax = D1 + sd.D1), width = 0.2) +
ggtitle("Oligotyping: Phenoflow") +
scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
scale_x_continuous(limits = c(0,150), expand = c(0,0)) +
scale_y_continuous(limits = c(0,70),expand = c(0,0)) +
ylab("Production (μgC/L/hr)") + xlab("Shannon Entropy (D1)") +
geom_smooth(data=subset(oligo_phenoflow_alpha, fraction == "Free"), method='lm') +
geom_smooth(data=subset(oligo_phenoflow_alpha, fraction == "WholePart"), method='lm') +
theme(legend.position=c(0.15,0.9), legend.title=element_blank()) +
annotate("text", x = 110, y=45, color = "cornflowerblue", fontface = "bold",
label = paste("R2 =", round(summary(free_oligo_D1_stats)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(free_oligo_D1_stats)$coefficients[,4][2]), digits = 4))) +
annotate("text", x = 120, y=5, color = "firebrick3", fontface = "bold",
label = paste("R2 =", round(summary(part_oligo_D1_stats)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(part_oligo_D1_stats)$coefficients[,4][2]), digits = 4)))
## Warning: Ignoring unknown parameters: width
# Can fractional production be predicted by phenoflow D2 INVERSE SIMPSON?
# FREE LIVING
free_oligo_D2_stats <- lm(frac_bacprod ~ D2, data = free_oligo_alpha)
free_oligo_D2_stats
##
## Call:
## lm(formula = frac_bacprod ~ D2, data = free_oligo_alpha)
##
## Coefficients:
## (Intercept) D2
## -7.4410 0.9395
summary(free_oligo_D2_stats)
##
## Call:
## lm(formula = frac_bacprod ~ D2, data = free_oligo_alpha)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.900 -7.696 -1.478 3.820 30.038
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.4410 15.5092 -0.480 0.6417
## D2 0.9395 0.4433 2.119 0.0601 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.28 on 10 degrees of freedom
## (12 observations deleted due to missingness)
## Multiple R-squared: 0.3099, Adjusted R-squared: 0.2409
## F-statistic: 4.491 on 1 and 10 DF, p-value: 0.0601
# PARTICLE
part_oligo_D2_stats <- lm(frac_bacprod ~ D2, data = part_oligo_alpha)
part_oligo_D2_stats
##
## Call:
## lm(formula = frac_bacprod ~ D2, data = part_oligo_alpha)
##
## Coefficients:
## (Intercept) D2
## -2.1723 0.3782
summary(part_oligo_D2_stats)
##
## Call:
## lm(formula = frac_bacprod ~ D2, data = part_oligo_alpha)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.7855 -2.4461 -0.7825 2.0559 7.7425
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.17231 2.77374 -0.783 0.45168
## D2 0.37821 0.07549 5.010 0.00053 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.623 on 10 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.7151, Adjusted R-squared: 0.6866
## F-statistic: 25.1 on 1 and 10 DF, p-value: 0.0005296
# Plot D2 INVERSE SIMPSON
oligo_pheno_D2 <- ggplot(oligo_phenoflow_alpha, aes(x=D2, y=frac_bacprod, color = fraction)) +
geom_point(size = 3.5) + geom_errorbarh(aes(xmin = D2 - sd.D2, xmax = D2 + sd.D2), width = 0.2) +
ggtitle("Oligotyping: Phenoflow") +
scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
scale_x_continuous(limits = c(0,80), expand = c(0,0)) +
scale_y_continuous(limits = c(0,70),expand = c(0,0)) +
ylab("Production (μgC/L/hr)") + xlab("Inverse Simpson (D2)") +
geom_smooth(data=subset(oligo_phenoflow_alpha, fraction == "Free"), method='lm') +
geom_smooth(data=subset(oligo_phenoflow_alpha, fraction == "WholePart"), method='lm') +
theme(legend.position=c(0.15,0.9), legend.title=element_blank()) +
annotate("text", x = 65, y=45, color = "cornflowerblue", fontface = "bold",
label = paste("R2 =", round(summary(free_oligo_D2_stats)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(free_oligo_D2_stats)$coefficients[,4][2]), digits = 4))) +
annotate("text", x = 50, y=5, color = "firebrick3", fontface = "bold",
label = paste("R2 =", round(summary(part_oligo_D2_stats)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(part_oligo_D2_stats)$coefficients[,4][2]), digits = 4)))
## Warning: Ignoring unknown parameters: width
oligotyping_phenoflow <- plot_grid(oligo_pheno_D0, oligo_pheno_D0chao, oligo_pheno_D1, oligo_pheno_D2,
labels = c("A", "B", "C", "D"),
align = "h", nrow = 2, ncol = 2)
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 139 rows containing missing values (geom_point).
## Warning: Removed 115 rows containing missing values (geom_errorbarh).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
## Warning: Removed 139 rows containing missing values (geom_point).
## Warning: Removed 116 rows containing missing values (geom_errorbarh).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
## Warning: Removed 34 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 139 rows containing missing values (geom_point).
## Warning: Removed 115 rows containing missing values (geom_errorbarh).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
## Warning: Removed 34 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 139 rows containing missing values (geom_point).
## Warning: Removed 115 rows containing missing values (geom_errorbarh).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
oligotyping_phenoflow
#ggsave("Figures/Phenoflow_oligotyping.png", oligotyping_phenoflow, dpi = 600, units = "in", width = 10, height = 8)
## OTU: SUBSAMPLE AT 6600
# OLIGOTYPING: SUBSAMPLE AT 4300
# Prune samples out that have too few reads
otu_merged_pruned <- prune_samples(sample_sums(otu_merged) > 6600, otu_merged)
otu_merged_pruned <- prune_taxa(taxa_sums(otu_merged_pruned) > 0, otu_merged_pruned)
min(sample_sums(otu_merged_pruned))
## [1] 6665
# Prune samples out that have too few reads
oligo_merged_pruned <- prune_samples(sample_sums(oligo_merged) > 4300, oligo_merged)
oligo_merged_pruned <- prune_taxa(taxa_sums(oligo_merged_pruned) > 0, oligo_merged_pruned)
min(sample_sums(oligo_merged_pruned))
## [1] 4332
# Scale the samples
scaled_otu_merged_pruned <- otu_merged_pruned %>%
scale_reads(round = "round")
scaled_oligo_merged_pruned <- oligo_merged_pruned %>%
scale_reads(round = "round")
# Initialize matrices to store richness and evenness estimates
# nsamp <- nsamples(oligo_merged_pruned)
# min_lib <- min(sample_sums(oligo_merged_pruned)) - 1
#
# oligo_richness <- matrix(nrow = nsamp, ncol = 100)
# row.names(oligo_richness) <- sample_names(oligo_merged_pruned)
#
# oligo_evenness <- matrix(nrow = nsamp, ncol = 100)
# row.names(oligo_evenness) <- sample_names(oligo_merged_pruned)
#
# oligo_shannon <- matrix(nrow = nsamp, ncol = 100)
# row.names(oligo_shannon) <- sample_names(oligo_merged_pruned)
#
# # It is always important to set a seed when you subsample so your result is replicable
# set.seed(777)
#
# for (i in 1:100) {
# # Subsample
# r <- rarefy_even_depth(oligo_merged_pruned, sample.size = min_lib, verbose = FALSE, replace = TRUE)
#
# # Calculate richness
# rich <- as.numeric(as.matrix(estimate_richness(r, measures = "Observed")))
# oligo_richness[ ,i] <- rich
#
# # Calculate evenness
# even <- as.numeric(as.matrix(estimate_richness(r, measures = "InvSimpson")))
# oligo_evenness[ ,i] <- even
#
# # Calculate Shannon Entropy
# shannon <- as.numeric(as.matrix(estimate_richness(r, measures = "Shannon")))
# oligo_shannon[ ,i] <- shannon
#
# }
# write.table(oligo_evenness, "metadata/oligo_evenness100_rarefy4331", row.names = TRUE)
# write.table(oligo_richness, "metadata/oligo_richness100_rarefy4331", row.names = TRUE)
# write.table(oligo_shannon, "metadata/oligo_shannon100_rarefy4331", row.names = TRUE)
#
#
#
# # Initialize matrices to store richness and evenness estimates
# nsamp <- nsamples(otu_merged_pruned)
# min_lib <- min(sample_sums(otu_merged_pruned)) - 1
#
# otu_richness <- matrix(nrow = nsamp, ncol = 100)
# row.names(otu_richness) <- sample_names(otu_merged_pruned)
#
# otu_evenness <- matrix(nrow = nsamp, ncol = 100)
# row.names(otu_evenness) <- sample_names(otu_merged_pruned)
#
# otu_shannon <- matrix(nrow = nsamp, ncol = 100)
# row.names(otu_shannon) <- sample_names(otu_merged_pruned)
#
# # It is always important to set a seed when you subsample so your result is replicable
# set.seed(777)
#
# for (i in 1:100) {
# # Subsample
# r <- rarefy_even_depth(otu_merged_pruned, sample.size = min_lib, verbose = FALSE, replace = TRUE)
#
# # Calculate richness
# rich <- as.numeric(as.matrix(estimate_richness(r, measures = "Observed")))
# otu_richness[ ,i] <- rich
#
# # Calculate evenness
# even <- as.numeric(as.matrix(estimate_richness(r, measures = "InvSimpson")))
# otu_evenness[ ,i] <- even
#
# # Calculate evenness
# shannon <- as.numeric(as.matrix(estimate_richness(r, measures = "Shannon")))
# otu_shannon[ ,i] <- shannon
#
# }
# write.table(otu_evenness, "metadata/otu_evenness100_rarefy6664", row.names = TRUE)
# write.table(otu_richness, "metadata/otu_richness100_rarefy6664", row.names = TRUE)
# write.table(otu_shannon, "metadata/otu_shannon100_rarefy6664", row.names = TRUE)
# Load values
nsamp <- nsamples(otu_merged_pruned)
min_lib <- min(sample_sums(otu_merged_pruned)) - 1
# Read in the files
otu_richness <- read.table("metadata/otu_richness100_rarefy6664", header = TRUE)
otu_evenness <- read.table("metadata/otu_evenness100_rarefy6664", header = TRUE)
otu_shannon <- read.table("metadata/otu_shannon100_rarefy6664", header = TRUE)
# Create a new dataframe to hold the means and standard deviations of richness estimates
norep_filter_name <- row.names(otu_richness)
mean <- apply(otu_richness, 1, mean)
sd <- apply(otu_richness, 1, sd)
measure <- rep("Richness", nsamp)
otu_rich_stats <- data.frame(norep_filter_name, mean, sd, measure)
# Create a new dataframe to hold the means and standard deviations of evenness estimates
norep_filter_name <- row.names(otu_evenness)
mean <- apply(otu_evenness, 1, mean)
sd <- apply(otu_evenness, 1, sd)
measure <- rep("Inverse_Simpson", nsamp)
otu_even_stats <- data.frame(norep_filter_name, mean, sd, measure)
# Create a new dataframe to hold the means and standard deviations of shannon entropy estimates
norep_filter_name <- row.names(otu_shannon)
mean <- apply(otu_shannon, 1, mean)
sd <- apply(otu_shannon, 1, sd)
measure <- rep("Shannon_Entropy", nsamp)
otu_shan_stats <- data.frame(norep_filter_name, mean, sd, measure)
# Create a new dataframe to hold the means and standard deviations of simpsons evenness estimates
norep_filter_name <- row.names(otu_evenness)
mean <- apply(otu_evenness, 1, mean)
sd <- apply(otu_evenness, 1, sd)
measure <- rep("Inverse_Simpson", nsamp)
otu_simpseven_stats <- data.frame(norep_filter_name, mean, sd, measure)
# Calculate Simpson's Evenness into new df called "simps_evenness"
otu_simps_evenness <- inner_join(otu_rich_stats, otu_even_stats, by = "norep_filter_name") %>%
mutate(mean = mean.y/mean.x,
sd = sd(mean),
measure = "Simpsons_Evenness") %>%
select(norep_filter_name, mean, sd, measure)
# Combine alpha diversity into one dataframe
otu_alpha <- rbind(otu_rich_stats, otu_even_stats, otu_simps_evenness, otu_shan_stats)
s <- data.frame(sample_data(otu_merged_pruned))
otu_alphadiv <- merge(otu_alpha, s, by = "norep_filter_name") %>%
filter(project == "Muskegon_Lake")
ggplot(filter(otu_alphadiv, measure == "Richness"), aes(x = norep_filter_name, y = mean, color = lakesite)) +
geom_point() + facet_grid(project~fraction, scales = "free") +
xlab("Sample Name") + ylab("Richness") +
theme(axis.text.x = element_text(angle = 30)) #Set the x-axis labels)
ggplot(filter(otu_alphadiv, measure == "Inverse_Simpson"), aes(x = norep_filter_name, y = mean, color = lakesite)) +
geom_point() + facet_grid(project~fraction, scales = "free") +
xlab("Sample Name") + ylab("Inverse Simpson") +
theme(axis.text.x = element_text(angle = 30)) #Set the x-axis labels)
ggplot(filter(otu_alphadiv, measure == "Shannon_Entropy"), aes(x = norep_filter_name, y = mean, color = lakesite)) +
geom_point() + facet_grid(project~fraction, scales = "free") +
xlab("Sample Name") + ylab("Shannon Entropy") +
theme(axis.text.x = element_text(angle = 30)) #Set the x-axis labels)
###############################
ML_otu_rich_stats <- filter(otu_alphadiv, measure == "Richness" & project == "Muskegon_Lake" & fraction %in% c("WholePart", "Free") & year == "2015")
# Can fractional production be predicted by richness?
free_ML_otu_rich_stats <- filter(ML_otu_rich_stats, fraction == "Free")
freeprod_ML_otu_rich <- lm(frac_bacprod ~ mean, data = free_ML_otu_rich_stats)
freeprod_ML_otu_rich
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_otu_rich_stats)
##
## Coefficients:
## (Intercept) mean
## 0.51402 0.05311
summary(freeprod_ML_otu_rich)
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_otu_rich_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.937 -11.778 -1.399 10.299 27.480
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.51402 15.94204 0.032 0.975
## mean 0.05311 0.03430 1.548 0.153
##
## Residual standard error: 16.51 on 10 degrees of freedom
## (12 observations deleted due to missingness)
## Multiple R-squared: 0.1934, Adjusted R-squared: 0.1127
## F-statistic: 2.398 on 1 and 10 DF, p-value: 0.1526
part_ML_abs_rich_stats <- filter(ML_otu_rich_stats, fraction == "WholePart")
partprod_MLotu_rich <- lm(frac_bacprod ~ mean, data = part_ML_abs_rich_stats)
partprod_MLotu_rich
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_rich_stats)
##
## Coefficients:
## (Intercept) mean
## -8.75434 0.02831
summary(partprod_MLotu_rich)
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_rich_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.1168 -4.1580 -0.2669 3.6917 8.8609
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.754341 4.704766 -1.861 0.0924 .
## mean 0.028315 0.006728 4.209 0.0018 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.203 on 10 degrees of freedom
## (10 observations deleted due to missingness)
## Multiple R-squared: 0.6391, Adjusted R-squared: 0.6031
## F-statistic: 17.71 on 1 and 10 DF, p-value: 0.001804
# Plot
otu_rich_vegan <- ggplot(ML_otu_rich_stats, aes(x=mean, y=frac_bacprod, color = fraction)) +
geom_point(size = 3.5) + geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), width = 0.2) +
ggtitle("OTU: Vegan") +
scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
scale_x_continuous(expand = c(0,0), limits = c(0,1250)) +
scale_y_continuous(limits = c(0,70),expand = c(0,0)) +
ylab("Production (μgC/L/hr)") + xlab("Observed Richness (D0)") +
#geom_smooth(data=subset(ML_otu_rich_stats, fraction == "Free"), method='lm') +
geom_smooth(data=subset(ML_otu_rich_stats, fraction == "WholePart"), method='lm') +
theme(legend.position=c(0.15,0.9),
legend.title=element_blank()) +
annotate("text", x = 800, y=35, color = "cornflowerblue", fontface = "bold",
label = paste("R2 =", round(summary(freeprod_ML_otu_rich)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(freeprod_ML_otu_rich)$coefficients[,4][2]), digits = 4))) +
annotate("text", x = 900, y=5, color = "firebrick3", fontface = "bold",
label = paste("R2 =", round(summary(partprod_MLotu_rich)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(partprod_MLotu_rich)$coefficients[,4][2]), digits = 4)));
## Warning: Ignoring unknown parameters: width
######################################################### SHANNON ENTROPY
# Subset a dataframe with the key information
ML_otu_shannon_stats <- filter(otu_alphadiv,
measure == "Shannon_Entropy" &
project == "Muskegon_Lake" &
fraction %in% c("WholePart", "Free") &
year == "2015")
# Can fractional production be predicted by simpsevenness?
free_ML_otu_shannon_stats <- filter(ML_otu_shannon_stats, fraction == "Free")
freeprod_ML_otu_shannon <- lm(frac_bacprod ~ mean, data = free_ML_otu_shannon_stats)
freeprod_ML_otu_shannon
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_otu_shannon_stats)
##
## Coefficients:
## (Intercept) mean
## -23.40 11.54
summary(freeprod_ML_otu_shannon)
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_otu_shannon_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.745 -10.533 -1.786 7.452 35.270
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -23.40 74.45 -0.314 0.760
## mean 11.54 18.05 0.639 0.537
##
## Residual standard error: 18.02 on 10 degrees of freedom
## (12 observations deleted due to missingness)
## Multiple R-squared: 0.03925, Adjusted R-squared: -0.05682
## F-statistic: 0.4086 on 1 and 10 DF, p-value: 0.5371
part_ML_abs_shannon_stats <- filter(ML_otu_shannon_stats, fraction == "WholePart")
partprod_MLotu_shannon <- lm(frac_bacprod ~ mean, data = part_ML_abs_shannon_stats)
partprod_MLotu_shannon
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_shannon_stats)
##
## Coefficients:
## (Intercept) mean
## -37.39 10.23
summary(partprod_MLotu_shannon)
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_shannon_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.0141 -3.3200 -0.0846 1.3747 11.7928
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -37.388 12.998 -2.876 0.01649 *
## mean 10.226 2.782 3.675 0.00428 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.649 on 10 degrees of freedom
## (10 observations deleted due to missingness)
## Multiple R-squared: 0.5746, Adjusted R-squared: 0.5321
## F-statistic: 13.51 on 1 and 10 DF, p-value: 0.004278
# Plot
otu_shannon_vegan <- ggplot(ML_otu_shannon_stats, aes(x=mean, y=frac_bacprod, color = fraction)) +
geom_point(size = 3.5) + geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), width = 0.2) +
ggtitle("OTU: Vegan") +
scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
scale_x_continuous(expand = c(0,0), limits=c(3, 6)) +
scale_y_continuous(limits = c(0,70),expand = c(0,0)) +
ylab("Production (μgC/L/hr)") + xlab("Shannon Entropy (D1)") +
#geom_smooth(data=subset(ML_otu_shannon_stats, fraction == "Free"), method='lm') +
geom_smooth(data=subset(ML_otu_shannon_stats, fraction == "WholePart"), method='lm') +
theme(legend.position=c(0.15,0.9),
legend.title=element_blank()) +
annotate("text", x = 3.5, y=35, color = "cornflowerblue", fontface = "bold",
label = paste("R2 =", round(summary(freeprod_ML_otu_shannon)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(freeprod_ML_otu_shannon)$coefficients[,4][2]), digits = 4))) +
annotate("text", x = 5.35, y=5, color = "firebrick3", fontface = "bold",
label = paste("R2 =", round(summary(partprod_MLotu_shannon)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(partprod_MLotu_shannon)$coefficients[,4][2]), digits = 4)));
## Warning: Ignoring unknown parameters: width
######################################################### INVERSE SIMPSON
# Subset a dataframe with the key information
ML_otu_invsimps_stats <- filter(otu_alphadiv,
measure == "Inverse_Simpson" &
project == "Muskegon_Lake" &
fraction %in% c("WholePart", "Free") &
year == "2015")
# Can fractional production be predicted by invsimpsness?
free_ML_otu_invsimps_stats <- filter(ML_otu_invsimps_stats, fraction == "Free")
freeprod_ML_otu_invsimps <- lm(frac_bacprod ~ mean, data = free_ML_otu_invsimps_stats)
freeprod_ML_otu_invsimps
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_otu_invsimps_stats)
##
## Coefficients:
## (Intercept) mean
## 11.2033 0.4942
summary(freeprod_ML_otu_invsimps)
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_otu_invsimps_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.623 -9.985 -1.943 6.863 35.467
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.2033 21.9431 0.511 0.621
## mean 0.4942 0.8187 0.604 0.560
##
## Residual standard error: 18.06 on 10 degrees of freedom
## (12 observations deleted due to missingness)
## Multiple R-squared: 0.03516, Adjusted R-squared: -0.06132
## F-statistic: 0.3644 on 1 and 10 DF, p-value: 0.5595
part_ML_abs_invsimps_stats <- filter(ML_otu_invsimps_stats, fraction == "WholePart")
partprod_MLotu_invsimps <- lm(frac_bacprod ~ mean, data = part_ML_abs_invsimps_stats)
partprod_MLotu_invsimps
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_invsimps_stats)
##
## Coefficients:
## (Intercept) mean
## 0.01836 0.26372
summary(partprod_MLotu_invsimps)
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_invsimps_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.4341 -2.1971 -0.2443 1.3800 7.6631
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.01836 2.35150 0.008 0.993925
## mean 0.26372 0.05148 5.122 0.000449 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.55 on 10 degrees of freedom
## (10 observations deleted due to missingness)
## Multiple R-squared: 0.724, Adjusted R-squared: 0.6965
## F-statistic: 26.24 on 1 and 10 DF, p-value: 0.0004492
# Plot Simpson's Evenness
otu_invsimps_vegan <- ggplot(ML_otu_invsimps_stats, aes(x=mean, y=frac_bacprod, color = fraction)) +
geom_point(size = 3.5) + geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), width = 0.2) +
ggtitle("OTU: Vegan") +
scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
scale_x_continuous(limits = c(0,100), expand = c(0,0)) +
scale_y_continuous(limits = c(0,70),expand = c(0,0)) +
ylab("Production (μgC/L/hr)") + xlab("Inverse Simpson (D2)") +
#geom_smooth(data=subset(ML_otu_invsimps_stats, fraction == "Free"), method='lm') +
geom_smooth(data=subset(ML_otu_invsimps_stats, fraction == "WholePart"), method='lm') +
theme(legend.position=c(0.85,0.9),
legend.title=element_blank()) +
annotate("text", x = 58, y=35, color = "cornflowerblue", fontface = "bold",
label = paste("R2 =", round(summary(freeprod_ML_otu_invsimps)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(freeprod_ML_otu_invsimps)$coefficients[,4][2]), digits = 4))) +
annotate("text", x = 63, y=5, color = "firebrick3", fontface = "bold",
label = paste("R2 =", round(summary(partprod_MLotu_invsimps)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(partprod_MLotu_invsimps)$coefficients[,4][2]), digits = 4)));
## Warning: Ignoring unknown parameters: width
######################################################### SIMPSON'S EVENNESS
# Subset a dataframe with the key information
ML_otu_simpseven_stats <- filter(otu_alphadiv,
measure == "Simpsons_Evenness" &
project == "Muskegon_Lake" &
fraction %in% c("WholePart", "Free") &
year == "2015")
# Can fractional production be predicted by simpsevenness?
free_ML_otu_simpseven_stats <- filter(ML_otu_simpseven_stats, fraction == "Free")
freeprod_ML_otu_simpseven <- lm(frac_bacprod ~ mean, data = free_ML_otu_simpseven_stats)
freeprod_ML_otu_simpseven
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_otu_simpseven_stats)
##
## Coefficients:
## (Intercept) mean
## 53.98 -494.38
summary(freeprod_ML_otu_simpseven)
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_otu_simpseven_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.813 -12.187 -5.198 9.118 34.886
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 53.98 26.68 2.023 0.0706 .
## mean -494.38 433.17 -1.141 0.2803
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.3 on 10 degrees of freedom
## (12 observations deleted due to missingness)
## Multiple R-squared: 0.1152, Adjusted R-squared: 0.02677
## F-statistic: 1.303 on 1 and 10 DF, p-value: 0.2803
part_ML_abs_simpseven_stats <- filter(ML_otu_simpseven_stats, fraction == "WholePart")
partprod_MLotu_simpseven <- lm(frac_bacprod ~ mean, data = part_ML_abs_simpseven_stats)
partprod_MLotu_simpseven
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_simpseven_stats)
##
## Coefficients:
## (Intercept) mean
## -4.393 276.542
summary(partprod_MLotu_simpseven)
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_simpseven_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.1679 -1.8119 -0.9444 0.7755 12.6815
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.393 4.774 -0.920 0.37911
## mean 276.542 85.310 3.242 0.00885 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.048 on 10 degrees of freedom
## (10 observations deleted due to missingness)
## Multiple R-squared: 0.5124, Adjusted R-squared: 0.4636
## F-statistic: 10.51 on 1 and 10 DF, p-value: 0.008845
# Plot
otu_simpseven_vegan <- ggplot(ML_otu_simpseven_stats, aes(x=mean, y=frac_bacprod, color = fraction)) +
geom_point(size = 3.5) +
ggtitle("OTU: Vegan") +
scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
scale_x_continuous(expand = c(0,0), limits=c(0, 0.09)) +
scale_y_continuous(limits = c(0,70),expand = c(0,0)) +
ylab("Production (μgC/L/hr)") + xlab("Simpson's Evenness") +
#geom_smooth(data=subset(ML_otu_simpseven_stats, fraction == "Free"), method='lm') +
geom_smooth(data=subset(ML_otu_simpseven_stats, fraction == "WholePart"), method='lm') +
theme(legend.position=c(0.15,0.9),
legend.title=element_blank()) +
annotate("text", x = 0.03, y=35, color = "cornflowerblue", fontface = "bold",
label = paste("R2 =", round(summary(freeprod_ML_otu_simpseven)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(freeprod_ML_otu_simpseven)$coefficients[,4][2]), digits = 4))) +
annotate("text", x = 0.015, y=7, color = "firebrick3", fontface = "bold",
label = paste("R2 =", round(summary(partprod_MLotu_simpseven)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(partprod_MLotu_simpseven)$coefficients[,4][2]), digits = 4)));
otu_vegan <- plot_grid(otu_rich_vegan, otu_simpseven_vegan, otu_shannon_vegan, otu_invsimps_vegan,
labels = c("A", "B", "C", "D"),
align = "h", nrow = 2, ncol = 2)
## Warning: Removed 10 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 22 rows containing missing values (geom_errorbarh).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
## Warning: Removed 10 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing missing values (geom_point).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
## Warning: Removed 10 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 22 rows containing missing values (geom_errorbarh).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
## Warning: Removed 10 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 22 rows containing missing values (geom_errorbarh).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
otu_vegan
ggsave("Figures/vegan_otu_alpha_vs_prod.png", otu_vegan, dpi = 600, units = "in", width = 10, height = 8)
# Load values
nsamp <- nsamples(oligo_merged_pruned)
min_lib <- min(sample_sums(oligo_merged_pruned)) - 1
# Read in the files
oligo_richness <- read.table("metadata/oligo_richness100_rarefy4331", header = TRUE)
oligo_evenness <- read.table("metadata/oligo_evenness100_rarefy4331", header = TRUE)
oligo_shannon <- read.table("metadata/oligo_shannon100_rarefy4331", header = TRUE)
# Create a new dataframe to hold the means and standard deviations of richness estimates
norep_filter_name <- row.names(oligo_richness)
mean <- apply(oligo_richness, 1, mean)
sd <- apply(oligo_richness, 1, sd)
measure <- rep("Richness", nsamp)
oligo_rich_stats <- data.frame(norep_filter_name, mean, sd, measure)
# Create a new dataframe to hold the means and standard deviations of evenness estimates
norep_filter_name <- row.names(oligo_evenness)
mean <- apply(oligo_evenness, 1, mean)
sd <- apply(oligo_evenness, 1, sd)
measure <- rep("Inverse_Simpson", nsamp)
oligo_even_stats <- data.frame(norep_filter_name, mean, sd, measure)
# Create a new dataframe to hold the means and standard deviations of Shannon Entropy
norep_filter_name <- row.names(oligo_shannon)
mean <- apply(oligo_shannon, 1, mean)
sd <- apply(oligo_shannon, 1, sd)
measure <- rep("Shannon_Entropy", nsamp)
oligo_shannon_stats <- data.frame(norep_filter_name, mean, sd, measure)
# Calculate Simpson's Evenness into new df called "simps_evenness"
oligo_simps_evenness <- inner_join(oligo_rich_stats, oligo_even_stats, by = "norep_filter_name") %>%
mutate(mean = mean.y/mean.x,
sd = sd(mean),
measure = "Simpsons_Evenness") %>%
select(norep_filter_name, mean, sd, measure)
# Combine alpha diversity into one dataframe
oligo_alpha <- rbind(oligo_rich_stats, oligo_even_stats, oligo_simps_evenness, oligo_shannon_stats)
s <- data.frame(sample_data(oligo_merged_pruned))
oligo_alphadiv <- merge(oligo_alpha, s, by = "norep_filter_name") %>%
filter(project == "Muskegon_Lake")
ggplot(filter(oligo_alphadiv, measure == "Richness"), aes(x = norep_filter_name, y = mean, color = lakesite)) +
geom_point() + facet_grid(project~fraction, scales = "free") +
xlab("Sample Name") + ylab("Richness") +
theme(axis.text.x = element_text(angle = 30)) #Set the x-axis labels)
ggplot(filter(oligo_alphadiv, measure == "Inverse_Simpson"), aes(x = norep_filter_name, y = mean, color = lakesite)) +
geom_point() + facet_grid(project~fraction, scales = "free") +
xlab("Sample Name") + ylab("Inverse Simpson") +
theme(axis.text.x = element_text(angle = 30)) #Set the x-axis labels)
ggplot(filter(oligo_alphadiv, measure == "Shannon_Entropy"), aes(x = norep_filter_name, y = mean, color = lakesite)) +
geom_point() + facet_grid(project~fraction, scales = "free") +
xlab("Sample Name") + ylab("Shannon Entropy") +
theme(axis.text.x = element_text(angle = 30)) #Set the x-axis labels)
######################################################### RICHNESS
# Subset a dataframe with the key information
ML_oligo_rich_stats <- filter(oligo_alphadiv,
measure == "Richness" &
project == "Muskegon_Lake" &
fraction %in% c("WholePart", "Free") &
year == "2015")
# Can fractional production be predicted by richness?
free_ML_oligo_rich_stats <- filter(ML_oligo_rich_stats, fraction == "Free")
freeprod_ML_oligo_rich <- lm(frac_bacprod ~ mean, data = free_ML_oligo_rich_stats)
freeprod_ML_oligo_rich
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_oligo_rich_stats)
##
## Coefficients:
## (Intercept) mean
## -30.1079 0.2411
summary(freeprod_ML_oligo_rich)
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_oligo_rich_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.101 -11.111 -0.630 8.417 36.302
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -30.1079 70.8409 -0.425 0.680
## mean 0.2411 0.3144 0.767 0.461
##
## Residual standard error: 17.87 on 10 degrees of freedom
## (12 observations deleted due to missingness)
## Multiple R-squared: 0.05554, Adjusted R-squared: -0.03891
## F-statistic: 0.588 on 1 and 10 DF, p-value: 0.4609
part_ML_abs_rich_stats <- filter(ML_oligo_rich_stats, fraction == "WholePart")
partprod_MLoligo_rich <- lm(frac_bacprod ~ mean, data = part_ML_abs_rich_stats)
partprod_MLoligo_rich
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_rich_stats)
##
## Coefficients:
## (Intercept) mean
## -33.0697 0.1797
summary(partprod_MLoligo_rich)
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_rich_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.1210 -4.8977 0.0914 2.5357 12.6906
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -33.06972 15.95877 -2.072 0.0650 .
## mean 0.17968 0.06609 2.719 0.0216 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.567 on 10 degrees of freedom
## (9 observations deleted due to missingness)
## Multiple R-squared: 0.425, Adjusted R-squared: 0.3675
## F-statistic: 7.391 on 1 and 10 DF, p-value: 0.02161
# Plot
oligo_rich_vegan <- ggplot(ML_oligo_rich_stats, aes(x=mean, y=frac_bacprod, color = fraction)) +
geom_point(size = 3.5) + geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), width = 0.2) +
ggtitle("Oligotyping: Vegan") +
scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
#scale_x_continuous(expand = c(0,0)) +
scale_y_continuous(limits = c(0,70),expand = c(0,0)) +
ylab("Production (μgC/L/hr)") + xlab("Observed Richness (D0)") +
#geom_smooth(data=subset(ML_oligo_rich_stats, fraction == "Free"), method='lm') +
geom_smooth(data=subset(ML_oligo_rich_stats, fraction == "WholePart"), method='lm') +
theme(legend.position=c(0.15,0.9),
legend.title=element_blank()) +
annotate("text", x = 210, y=45, color = "cornflowerblue", fontface = "bold",
label = paste("R2 =", round(summary(freeprod_ML_oligo_rich)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(freeprod_ML_oligo_rich)$coefficients[,4][2]), digits = 4))) +
annotate("text", x = 260, y=25, color = "firebrick3", fontface = "bold",
label = paste("R2 =", round(summary(partprod_MLoligo_rich)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(partprod_MLoligo_rich)$coefficients[,4][2]), digits = 4)));
## Warning: Ignoring unknown parameters: width
######################################################### SHANNON ENTROPY
# Subset a dataframe with the key information
ML_oligo_shannon_stats <- filter(oligo_alphadiv,
measure == "Shannon_Entropy" &
project == "Muskegon_Lake" &
fraction %in% c("WholePart", "Free") &
year == "2015")
# Can fractional production be predicted by richness?
free_ML_oligo_shannon_stats <- filter(ML_oligo_shannon_stats, fraction == "Free")
freeprod_ML_oligo_shannon <- lm(frac_bacprod ~ mean, data = free_ML_oligo_shannon_stats)
freeprod_ML_oligo_shannon
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_oligo_shannon_stats)
##
## Coefficients:
## (Intercept) mean
## -166.70 45.29
summary(freeprod_ML_oligo_shannon)
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_oligo_shannon_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.794 -6.324 -1.109 5.862 26.839
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -166.70 115.57 -1.442 0.18
## mean 45.29 27.41 1.652 0.13
##
## Residual standard error: 16.3 on 10 degrees of freedom
## (12 observations deleted due to missingness)
## Multiple R-squared: 0.2144, Adjusted R-squared: 0.1359
## F-statistic: 2.729 on 1 and 10 DF, p-value: 0.1295
part_ML_abs_shannon_stats <- filter(ML_oligo_shannon_stats, fraction == "WholePart")
partprod_MLoligo_shannon <- lm(frac_bacprod ~ mean, data = part_ML_abs_shannon_stats)
partprod_MLoligo_shannon
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_shannon_stats)
##
## Coefficients:
## (Intercept) mean
## -43.92 12.97
summary(partprod_MLoligo_shannon)
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_shannon_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.9675 -2.5479 -0.8387 1.3585 11.6538
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -43.915 16.918 -2.596 0.02669 *
## mean 12.970 4.047 3.205 0.00942 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.083 on 10 degrees of freedom
## (9 observations deleted due to missingness)
## Multiple R-squared: 0.5067, Adjusted R-squared: 0.4573
## F-statistic: 10.27 on 1 and 10 DF, p-value: 0.009417
# Plot
oligo_shannon_vegan <- ggplot(ML_oligo_shannon_stats, aes(x=mean, y=frac_bacprod, color = fraction)) +
geom_point(size = 3.5) + geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), width = 0.2) +
ggtitle("Oligotyping: Vegan") +
scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
#scale_x_continuous(expand = c(0,0)) +
scale_y_continuous(limits = c(0,70),expand = c(0,0)) +
ylab("Production (μgC/L/hr)") + xlab("Shannon Entropy (D1)") +
#geom_smooth(data=subset(ML_oligo_shannon_stats, fraction == "Free"), method='lm') +
geom_smooth(data=subset(ML_oligo_shannon_stats, fraction == "WholePart"), method='lm') +
theme(legend.position=c(0.15,0.9),
legend.title=element_blank()) +
annotate("text", x = 3.9, y=35, color = "cornflowerblue", fontface = "bold",
label = paste("R2 =", round(summary(freeprod_ML_oligo_shannon)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(freeprod_ML_oligo_shannon)$coefficients[,4][2]), digits = 4))) +
annotate("text", x = 4.7, y=5, color = "firebrick3", fontface = "bold",
label = paste("R2 =", round(summary(partprod_MLoligo_shannon)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(partprod_MLoligo_shannon)$coefficients[,4][2]), digits = 4)));
## Warning: Ignoring unknown parameters: width
######################################################### INVERSE SIMPSON
# Subset a dataframe with the key information
ML_oligo_invsimps_stats <- filter(oligo_alphadiv,
measure == "Inverse_Simpson" &
project == "Muskegon_Lake" &
fraction %in% c("WholePart", "Free") &
year == "2015")
# Can fractional production be predicted by invsimpsness?
free_ML_oligo_invsimps_stats <- filter(ML_oligo_invsimps_stats, fraction == "Free")
freeprod_ML_oligo_invsimps <- lm(frac_bacprod ~ mean, data = free_ML_oligo_invsimps_stats)
freeprod_ML_oligo_invsimps
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_oligo_invsimps_stats)
##
## Coefficients:
## (Intercept) mean
## -7.4198 0.9455
summary(freeprod_ML_oligo_invsimps)
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_oligo_invsimps_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.025 -7.628 -1.606 3.779 30.021
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.4198 15.6884 -0.473 0.6464
## mean 0.9455 0.4519 2.092 0.0629 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.34 on 10 degrees of freedom
## (12 observations deleted due to missingness)
## Multiple R-squared: 0.3045, Adjusted R-squared: 0.2349
## F-statistic: 4.377 on 1 and 10 DF, p-value: 0.06289
part_ML_abs_invsimps_stats <- filter(ML_oligo_invsimps_stats, fraction == "WholePart")
partprod_MLoligo_invsimps <- lm(frac_bacprod ~ mean, data = part_ML_abs_invsimps_stats)
partprod_MLoligo_invsimps
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_invsimps_stats)
##
## Coefficients:
## (Intercept) mean
## -2.1739 0.3796
summary(partprod_MLoligo_invsimps)
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_invsimps_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.8628 -2.5078 -0.7671 2.1398 7.7541
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.17388 2.80930 -0.774 0.456943
## mean 0.37965 0.07681 4.942 0.000585 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.668 on 10 degrees of freedom
## (9 observations deleted due to missingness)
## Multiple R-squared: 0.7095, Adjusted R-squared: 0.6805
## F-statistic: 24.43 on 1 and 10 DF, p-value: 0.000585
# Plot Simpson's Evenness
oligo_invsimps_vegan <- ggplot(ML_oligo_invsimps_stats, aes(x=mean, y=frac_bacprod, color = fraction)) +
geom_point(size = 3.5) + geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), width = 0.2) +
ggtitle("Oligotyping: Vegan") +
scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
#scale_x_continuous(expand = c(0,0)) +
scale_y_continuous(limits = c(0,70),expand = c(0,0)) +
ylab("Production (μgC/L/hr)") + xlab("Inverse Simpson Index") +
geom_smooth(data=subset(ML_oligo_invsimps_stats, fraction == "Free"), method='lm') +
geom_smooth(data=subset(ML_oligo_invsimps_stats, fraction == "WholePart"), method='lm') +
theme(legend.position=c(0.15,0.9),
legend.title=element_blank()) +
annotate("text", x = 58, y=35, color = "cornflowerblue", fontface = "bold",
label = paste("R2 =", round(summary(freeprod_ML_oligo_invsimps)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(freeprod_ML_oligo_invsimps)$coefficients[,4][2]), digits = 4))) +
annotate("text", x = 63, y=5, color = "firebrick3", fontface = "bold",
label = paste("R2 =", round(summary(partprod_MLoligo_invsimps)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(partprod_MLoligo_invsimps)$coefficients[,4][2]), digits = 4)));
## Warning: Ignoring unknown parameters: width
######################################################### SIMPSON'S EVENNESS
# Subset a dataframe with the key information
ML_oligo_simpseven_stats <- filter(oligo_alphadiv,
measure == "Simpsons_Evenness" &
project == "Muskegon_Lake" &
fraction %in% c("WholePart", "Free") &
year == "2015")
# Can fractional production be predicted by simpsevenness?
free_ML_oligo_simpseven_stats <- filter(ML_oligo_simpseven_stats, fraction == "Free")
freeprod_ML_oligo_simpseven <- lm(frac_bacprod ~ mean, data = free_ML_oligo_simpseven_stats)
freeprod_ML_oligo_simpseven
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_oligo_simpseven_stats)
##
## Coefficients:
## (Intercept) mean
## -1.932 174.827
summary(freeprod_ML_oligo_simpseven)
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_oligo_simpseven_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.188 -9.173 -3.443 5.543 28.901
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.932 16.112 -0.120 0.907
## mean 174.827 103.655 1.687 0.123
##
## Residual standard error: 16.22 on 10 degrees of freedom
## (12 observations deleted due to missingness)
## Multiple R-squared: 0.2215, Adjusted R-squared: 0.1436
## F-statistic: 2.845 on 1 and 10 DF, p-value: 0.1226
part_ML_abs_simpseven_stats <- filter(ML_oligo_simpseven_stats, fraction == "WholePart")
partprod_MLoligo_simpseven <- lm(frac_bacprod ~ mean, data = part_ML_abs_simpseven_stats)
partprod_MLoligo_simpseven
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_simpseven_stats)
##
## Coefficients:
## (Intercept) mean
## -4.501 112.772
summary(partprod_MLoligo_simpseven)
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_simpseven_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.8889 -3.4521 -0.8976 2.5784 7.5087
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.501 3.517 -1.280 0.22945
## mean 112.772 24.956 4.519 0.00111 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.966 on 10 degrees of freedom
## (9 observations deleted due to missingness)
## Multiple R-squared: 0.6713, Adjusted R-squared: 0.6384
## F-statistic: 20.42 on 1 and 10 DF, p-value: 0.00111
# Plot Simpson's Evenness
oligo_simpseven_vegan <- ggplot(ML_oligo_simpseven_stats, aes(x=mean, y=frac_bacprod, color = fraction)) +
geom_point(size = 3.5) +
ggtitle("Oligotyping: Vegan") +
scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
#scale_x_continuous(expand = c(0,0)) +
scale_y_continuous(limits = c(0,70),expand = c(0,0)) +
ylab("Production (μgC/L/hr)") + xlab("Simpson's Evenness") +
#geom_smooth(data=subset(ML_oligo_simpseven_stats, fraction == "Free"), method='lm') +
geom_smooth(data=subset(ML_oligo_simpseven_stats, fraction == "WholePart"), method='lm') +
theme(legend.position=c(0.15,0.9),
legend.title=element_blank()) +
annotate("text", x = 0.105, y=25, color = "cornflowerblue", fontface = "bold",
label = paste("R2 =", round(summary(freeprod_ML_oligo_simpseven)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(freeprod_ML_oligo_simpseven)$coefficients[,4][2]), digits = 4))) +
annotate("text", x = 0.20, y=5, color = "firebrick3", fontface = "bold",
label = paste("R2 =", round(summary(partprod_MLoligo_simpseven)$adj.r.squared, digits = 4), "\n",
"p =", round(unname(summary(partprod_MLoligo_simpseven)$coefficients[,4][2]), digits = 4)));
oligotyping_vegan <- plot_grid(oligo_rich_vegan, oligo_simpseven_vegan, oligo_shannon_vegan, oligo_invsimps_vegan,
labels = c("A", "B", "C", "D"),
align = "h", nrow = 2, ncol = 2)
## Warning: Removed 9 rows containing non-finite values (stat_smooth).
## Warning: Removed 21 rows containing missing values (geom_point).
## Warning: Removed 21 rows containing missing values (geom_errorbarh).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
## Warning: Removed 9 rows containing non-finite values (stat_smooth).
## Warning: Removed 21 rows containing missing values (geom_point).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
## Warning: Removed 9 rows containing non-finite values (stat_smooth).
## Warning: Removed 21 rows containing missing values (geom_point).
## Warning: Removed 21 rows containing missing values (geom_errorbarh).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 9 rows containing non-finite values (stat_smooth).
## Warning: Removed 21 rows containing missing values (geom_point).
## Warning: Removed 21 rows containing missing values (geom_errorbarh).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
oligotyping_vegan
#ggsave("Figures/vegan_oligo_alpha_vs_prod.png", oligotyping_vegan, dpi = 600, units = "in", width = 10, height = 8)
musk_scaled_otu_merged_pruned <- subset_samples(scaled_otu_merged_pruned, project == "Muskegon_Lake" &
year == "2015" &
limnion == "Top" &
fraction %in% c("WholePart", "Free"))
# Fix month levels in sample_data
sample_data(musk_scaled_otu_merged_pruned)$fraction <- factor(
sample_data(musk_scaled_otu_merged_pruned)$fraction,
levels = c("WholePart", "Free")
)
sample_data(musk_scaled_otu_merged_pruned)$lakesite <- factor(
sample_data(musk_scaled_otu_merged_pruned)$lakesite,
levels = c("MOT", "MDP", "MBR", "MIN")
)
musk_scaled_otu_merged_pruned_phylum <- musk_scaled_otu_merged_pruned %>%
tax_glom(taxrank = "Phylum") %>% # agglomerate at phylum level
transform_sample_counts(function(x) {x/sum(x)} ) %>% # Transform to rel. abundance
psmelt() %>% # Melt to long format
filter(Abundance > 0.02) %>% # Filter out low abundance taxa
arrange(Phylum) # Sort data frame alphabetically by phylum
# Set colors for plotting
phylum_colors <- c(
"#CBD588", "#5F7FC7", "orange","#DA5724", "#508578", "plum1",
"firebrick4", "white", "#D14285","seagreen3", "gold",
"#8569D5", "firebrick1", "peachpuff"
)
# Plot
otu_stack_bar <- ggplot(musk_scaled_otu_merged_pruned_phylum, aes(x = season, y = Abundance, fill = Phylum)) +
facet_grid(fraction~lakesite) +
geom_bar(stat = "identity", color = "black") +
scale_fill_manual(values = phylum_colors) +
guides(fill = guide_legend(reverse = FALSE, keywidth = 1, keyheight = 1)) +
ylab("Relative Abundance (Phyla > 2%) \n") +
ggtitle("Phylum Composition of 2015 Muskegon Lake \n Bacterial Communities by Sampling Site") +
theme(strip.background = element_rect(fill = "white"),
axis.text.x = element_text(colour = "black", angle=45, hjust = 1, vjust = 1))
#ggsave("Figures/otu_phylum_stackedbar.png", otu_stack_bar, dpi = 600, units = "in", width = 12, height = 8)
musk_scaled_oligo_merged_pruned <- subset_samples(scaled_oligo_merged_pruned, project == "Muskegon_Lake" &
year == "2015" &
limnion == "Top" &
fraction %in% c("WholePart", "Free"))
# Fix month levels in sample_data
sample_data(musk_scaled_oligo_merged_pruned)$fraction <- factor(
sample_data(musk_scaled_oligo_merged_pruned)$fraction,
levels = c("WholePart", "Free")
)
sample_data(musk_scaled_oligo_merged_pruned)$lakesite <- factor(
sample_data(musk_scaled_oligo_merged_pruned)$lakesite,
levels = c("MOT", "MDP", "MBR", "MIN")
)
musk_scaled_oligo_merged_pruned_phylum <- musk_scaled_oligo_merged_pruned %>%
tax_glom(taxrank = "Phylum") %>% # agglomerate at phylum level
transform_sample_counts(function(x) {x/sum(x)} ) %>% # Transform to rel. abundance
psmelt() %>% # Melt to long format
filter(Abundance > 0.02) %>% # Filter out low abundance taxa
arrange(Phylum) # Sort data frame alphabetically by phylum
# Set colors for plotting
phylum_colors <- c(
"#CBD588", "#5F7FC7", "orange","#DA5724", "#508578", "plum1",
"firebrick4", "white", "#D14285","seagreen3", "gold",
"#8569D5", "firebrick1", "peachpuff"
)
# Plot
ggplot(musk_scaled_oligo_merged_pruned_phylum, aes(x = season, y = Abundance, fill = Phylum)) +
facet_grid(fraction~lakesite) +
geom_bar(stat = "identity", color = "black") +
scale_fill_manual(values = phylum_colors) +
guides(fill = guide_legend(reverse = FALSE, keywidth = 1, keyheight = 1)) +
ylab("Relative Abundance (Phyla > 2%) \n") +
ggtitle("Oligotyping Phylum Composition of 2015 Muskegon Lake \n Bacterial Communities by Sampling Site") +
theme(strip.background = element_rect(fill = "white"),
axis.text.x = element_text(colour = "black", angle=45, hjust = 1, vjust = 1))
# ######################################################### RICHNESS
# # Subset a dataframe with the key information
# ML_oligo_rich_stats <- filter(oligo_alphadiv,
# measure == "Richness" &
# project == "Muskegon_Lake" &
# fraction %in% c("WholePart", "Free") &
# year == "2015")
#
#
# # Can fractional production be predicted by richness?
# free_ML_oligo_rich_stats <- filter(ML_oligo_rich_stats, fraction == "Free")
# freeprod_ML_oligo_rich <- lm(frac_bacprod ~ mean, data = free_ML_oligo_rich_stats)
# freeprod_ML_oligo_rich
# summary(freeprod_ML_oligo_rich)
#
# part_ML_abs_rich_stats <- filter(ML_oligo_rich_stats, fraction == "WholePart")
# partprod_MLoligo_rich <- lm(frac_bacprod ~ mean, data = part_ML_abs_rich_stats)
# partprod_MLoligo_rich
# summary(partprod_MLoligo_rich)
# Plot
ggplot(filter(alpha_comb_final, Taxa == "Cyanobacteria"),
aes(x=Estimate, y=frac_bacprod, color = fraction)) +
geom_point(size = 3.5) +
ggtitle("OTU: Cyanobacteria") +
scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
facet_wrap( ~ Alphadiv, ncol=2, scales="free_x") +
#scale_x_continuous(expand = c(0,0)) +
#scale_y_continuous(limits = c(0,70),expand = c(0,0)) +
ylab("Production (μgC/L/hr)") + xlab("Observed Richness (D0)") +
#geom_smooth(data=subset(ML_oligo_rich_stats, fraction == "Free"), method='lm') +
#geom_smooth(data=subset(ML_oligo_rich_stats, fraction == "WholePart"), method='lm') +
theme(legend.position="right",
legend.title=element_blank())
## Error in filter_(.data, .dots = lazyeval::lazy_dots(...)): object 'alpha_comb_final' not found
# annotate("text", x = 210, y=45, color = "cornflowerblue", fontface = "bold",
# label = paste("R2 =", round(summary(freeprod_ML_oligo_rich)$adj.r.squared, digits = 4), "\n",
# "p =", round(unname(summary(freeprod_ML_oligo_rich)$coefficients[,4][2]), digits = 4))) +
# annotate("text", x = 260, y=25, color = "firebrick3", fontface = "bold",
# label = paste("R2 =", round(summary(partprod_MLoligo_rich)$adj.r.squared, digits = 4), "\n",
# "p =", round(unname(summary(partprod_MLoligo_rich)$coefficients[,4][2]), digits = 4))); oligo_rich_vegan
prod_data <- data.frame(sample_data(musk_scaled_otu_merged_pruned))
ggplot(prod_data, aes(x = season, y = tot_bacprod, color = lakesite, group = lakesite, linetype = lakesite)) +
geom_point(size = 3.5) + geom_line(size = 2) +
ylab("Total Bacterial\n Production") +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(colour = "black", angle=15, hjust = 1, vjust = 1))
#### Plot many environ variables
tot_prod_plot <- ggplot(prod_data, aes(x = season, y = tot_bacprod, color = lakesite, group = lakesite)) +
facet_grid(.~lakesite) +
geom_point(size = 3.5) + geom_line() +
ylab("Total Bacterial\n Production") +
theme(strip.background = element_rect(fill = "white"),
axis.title.x = element_blank(),
axis.text.x = element_text(colour = "black", angle=15, hjust = 1, vjust = 1),
legend.position="none")
chla_plot <- ggplot(prod_data, aes(x = season, y = Chl_Lab_ugperL, color = lakesite, group = lakesite)) +
facet_grid(.~lakesite) +
geom_point(size = 3.5) + geom_line() +
ylab("Chlorophyll a") +
theme(strip.background = element_rect(fill = "white"),
axis.title.x = element_blank(),
axis.text.x = element_text(colour = "black", angle=15, hjust = 1, vjust = 1),
legend.position="none")
ph_plot <- ggplot(prod_data, aes(x = season, y = pH, color = lakesite, group = lakesite)) +
facet_grid(.~lakesite) +
geom_point(size = 3.5) + geom_line() +
ylab("pH") +
theme(strip.background = element_rect(fill = "white"),
axis.title.x = element_blank(),
axis.text.x = element_text(colour = "black", angle=15, hjust = 1, vjust = 1),
legend.position="none")
tp_plot <- ggplot(prod_data, aes(x = season, y = TP_ugperL, color = lakesite, group = lakesite)) +
facet_grid(.~lakesite) +
geom_point(size = 3.5) + geom_line() +
ylab("TP_ugperL") +
theme(strip.background = element_rect(fill = "white"),
axis.title.x = element_blank(),
axis.text.x = element_text(colour = "black", angle=15, hjust = 1, vjust = 1),
legend.position="none")
no3_plot <- ggplot(prod_data, aes(x = season, y = NO3_mgperL, color = lakesite, group = lakesite)) +
facet_grid(.~lakesite) +
geom_point(size = 3.5) + geom_line() +
ylab("NO3_mgperL") +
theme(strip.background = element_rect(fill = "white"),
axis.title.x = element_blank(),
axis.text.x = element_text(colour = "black", angle=15, hjust = 1, vjust = 1),
legend.position="none")
nh3_plot <- ggplot(prod_data, aes(x = season, y = NH3_mgperL, color = lakesite, group = lakesite)) +
facet_grid(.~lakesite) +
geom_point(size = 3.5) + geom_line() +
ylab("NH3_mgperL") +
theme(strip.background = element_rect(fill = "white"),
axis.title.x = element_blank(),
axis.text.x = element_text(colour = "black", angle=15, hjust = 1, vjust = 1),
legend.position="none")
environ_seasons <- plot_grid(tot_prod_plot, chla_plot, ph_plot, tp_plot, no3_plot, nh3_plot,
labels = c("A", "B", "C", "D", "E", "F"),
align = "h", nrow = 6, ncol = 1)
environ_seasons
#ggsave("Figures/environ_seasons.png", environ_seasons, dpi = 600, units = "in", width = 12, height = 12)
# Michelle Berry's subsetting function, similar to phyloseq::subset_taxa, except taxa can
# be passed as arguments within functions without weird environment errors
#
# Args:
# physeq: a phyloseq object
# taxrank: taxonomic rank to filter on
# taxa: a vector of taxa groups to filter on
#
# Returns:
# a phyloseq object subsetted to the x taxa in taxrank
my_subset_taxa <- function(physeq, taxrank, taxa) {
physeq_tax_sub <- tax_table(physeq)[tax_table(physeq)[ , taxrank] %in% taxa, ]
tax_table(physeq) <- physeq_tax_sub
return(physeq)
}
# Initialize parameters
trials <- 100
min_lib <- min(sample_sums(musk_scaled_otu_merged_pruned)) # Depth we are rarefying to
# Groups to estimate alpha diversity for
mytaxa <- c("Acidobacteria", "Actinobacteria", "Alphaproteobacteria", "Armatimonadetes",
"Bacteroidetes", "Betaproteobacteria", "Chlorobi", "Chloroflexi",
"Cyanobacteria","Deltaproteobacteria", "Gammaproteobacteria",
"Planctomycetes","Verrucomicrobia")
names(mytaxa) <- mytaxa
# Taxonomic ranks of mytaxa
mytaxa_taxrank <- c("Phylum", "Phylum", "Phylum", "Phylum", "Phylum", "Phylum",
"Phylum", "Phylum", "Phylum", "Phylum", "Phylum", "Phylum", "Phylum")
names(mytaxa_taxrank) <- mytaxa
# Data frame to hold alpha diversity estimates over trials
alphadiv_df <- data.frame(matrix(nrow = nsamples(musk_scaled_otu_merged_pruned), ncol = trials))
# Initialize empty df's for richness and evenness of all taxa in mytaxa
richness <- lapply(mytaxa, function(x) {return(alphadiv_df)} )
shannon <- lapply(mytaxa, function(x) {return(alphadiv_df)} )
invsimps <- lapply(mytaxa, function(x) {return(alphadiv_df)} )
simpseven <- lapply(mytaxa, function(x) {return(alphadiv_df)} )
alphadiv_list <- list(richness = richness, shannon = shannon, invsimps = invsimps, simpseven = simpseven)
# Set the seed for reproducibility
set.seed(3)
# Run trials to subsample and estimate diversity
for (i in 1:100) {
# Subsample
rarefied_physeq <- rarefy_even_depth(musk_scaled_otu_merged_pruned, sample.size = min_lib, verbose = FALSE, replace = TRUE)
# Generate alpha-diversity estimates for each taxonomic group
for (t in mytaxa) {
# Subset the taxa
physeq_sub <- my_subset_taxa(physeq = rarefied_physeq,
taxrank = mytaxa_taxrank[t],
taxa = t)
# Calculate observed richness for that group and store value in a df column
richness <- estimate_richness(physeq_sub, measures = "Observed")[ ,1]
alphadiv_list$richness[[t]][ ,i] <- richness
# Calculate observed shannon for that group and store value in a df column
shannon <- estimate_richness(physeq_sub, measures = "Shannon")[ ,1]
alphadiv_list$shannon[[t]][ ,i] <- shannon
# Calculate observed invsimps for that group and store value in a df column
invsimps <- estimate_richness(physeq_sub, measures = "InvSimpson")[ ,1]
alphadiv_list$invsimps[[t]][ ,i] <- invsimps
# Calculate Simpson's Evenness for that group and store value in a df column
alphadiv_list$simpseven[[t]][ ,i] <- (estimate_richness(physeq_sub, measures = "InvSimpson")[ ,1]/richness)
}
}
# Calculate the means of richness and inverse simpson from the 100 trials
alphadiv_est <- lapply(alphadiv_list, function(div_measure) {
lapply(div_measure, function(taxa_group) {
alpha_mean <- rowMeans(taxa_group)
return(alpha_mean)
})
})
# Convert alphadiv_est richness and simpson's E lists into wide data frames
l <- lapply(alphadiv_est, function(x) {
# convert from list to data.frame
est_df <- plyr::ldply(.data = x, .fun = data.frame)
names(est_df) <- c("Taxa", "Diversity")
# Add in SampleID column and spread to wide format
r <- est_df %>%
mutate(norep_filter_name = rep(sample_names(musk_scaled_otu_merged_pruned), length(mytaxa)))
return(r)
})
# Merge sample metadata with these estimates
merge_dat <- data.frame(sample_data(musk_scaled_otu_merged_pruned)) %>%
select(norep_filter_name, Chl_Lab_ugperL, TP_ugperL, pH, frac_bacprod, NH3_mgperL, NO3_mgperL,
lakesite, fraction, season) #%>%
#mutate(logChla = log(Chla)) %>%
#mutate(logPhyco = log(Phycocyanin + 0.1)) %>%
#mutate(logTP = log(TP)) %>%
#mutate(logTurb = log(Turbidity))
# Create a df with a "Diversity" column that includes richness and inv. simpson,
# and log-chl a values from erie sample_data
alpha_comb1 <- l$richness %>%
left_join(y = l$shannon, by = c("Taxa", "norep_filter_name")) %>% # Join the richness and inv_simp df's
rename(Richness = Diversity.x, Shannon = Diversity.y)
alpha_comb2 <- l$invsimps %>%
left_join(y = l$simpseven, by = c("Taxa", "norep_filter_name")) %>% # Join the richness and inv_simp df's
rename(Inverse_Simpson = Diversity.x, Simpsons_Evenness = Diversity.y)
alpha_comb_final <- alpha_comb1 %>%
left_join(y = alpha_comb2, by = c("Taxa", "norep_filter_name")) %>%
left_join(merge_dat, by = "norep_filter_name") %>% # Join with merged nutrient data
gather(key = "Alphadiv", value = "Estimate", Richness, Shannon, Inverse_Simpson, Simpsons_Evenness)
flow_cy <- read.csv2("metadata/flow_cytometry.csv", header = TRUE) %>%
select(-X) %>%
filter(Lake == "Muskegon") %>%
# Rename columns so they are more representative of the data within them
rename(volume_uL = volume,
raw_counts = counts,
HNA_counts = HNA,
LNA_counts = LNA) %>%
mutate(cells_per_uL = (raw_counts*dilution)/volume_uL,
HNA_per_uL = (HNA_counts*dilution)/volume_uL,
HNA_percent = (HNA_counts)/raw_counts*100,
LNA_per_uL = (LNA_counts*dilution)/volume_uL,
LNA_percent = (LNA_counts)/raw_counts)
flow_cy$Site <- factor(flow_cy$Site, levels = c("MOT", "MDP", "MBR", "MIN"))
#substr(flow_cy$Sample_16S, 5)
### HNA PER uL
surface_HNA <- ggplot(filter(flow_cy, Depth == "Surface"),
aes(x = Site, y = HNA_per_uL, fill = Site, color = Season)) +
geom_boxplot(alpha = 0.5, color = "black") +
scale_y_continuous(limits = c(600,3700)) +
geom_jitter(size = 3.5) + facet_grid(.~Year) + ggtitle("Surface samples only")
deep_HNA <- ggplot(filter(flow_cy, Depth == "Deep"),
aes(x = Site, y = HNA_per_uL, fill = Site, color = Season)) +
geom_boxplot(alpha = 0.5, color = "black") +
scale_y_continuous(limits = c(600,3700)) +
geom_jitter(size = 3.5) + facet_grid(.~Year) + ggtitle("Deep samples only")
all_HNA <- ggplot(filter(flow_cy),
aes(x = Site, y = HNA_per_uL, fill = Site, color = Season)) +
geom_boxplot(alpha = 0.5, color = "black") +
scale_y_continuous(limits = c(600,3700)) +
geom_jitter(size = 3.5) + facet_grid(.~Year) + ggtitle("All samples")
HNA_per_uL_plots <- plot_grid(surface_HNA,deep_HNA, all_HNA,
labels = c("A", "B", "C"),
align = "h", nrow = 3, ncol = 1)
HNA_per_uL_plots
# HNA: LNA RATIO
lim_HNA <- c(10,60)
surface_HNA_percent <- ggplot(filter(flow_cy, Depth == "Surface"),
aes(x = Site, y = HNA_percent, fill = Site, color = Season)) +
geom_boxplot(alpha = 0.5, color = "black") +
scale_y_continuous(limits = lim_HNA) +
geom_jitter(size = 3.5) + facet_grid(.~Year) + ggtitle("Surface samples only")
deep_HNA_percent <- ggplot(filter(flow_cy, Depth == "Deep"),
aes(x = Site, y = HNA_percent, fill = Site, color = Season)) +
geom_boxplot(alpha = 0.5, color = "black") +
scale_y_continuous(limits = lim_HNA) +
geom_jitter(size = 3.5) + facet_grid(.~Year) + ggtitle("Deep samples only")
all_HNA_percent <- ggplot(filter(flow_cy),
aes(x = Site, y = HNA_percent, fill = Site, color = Season)) +
geom_boxplot(alpha = 0.5, color = "black") +
scale_y_continuous(limits = lim_HNA) +
geom_jitter(size = 3.5) + facet_grid(.~Year) + ggtitle("All samples")
HNA_percent_plots <- plot_grid(surface_HNA_percent,deep_HNA_percent, all_HNA_percent,
labels = c("A", "B", "C"),
align = "h", nrow = 3, ncol = 1)
all_flow_cy <- read.csv2("metadata/flow_cytometry.csv", header = TRUE) %>%
select(-X) %>%
# Rename columns so they are more representative of the data within them
rename(volume_uL = volume,
raw_counts = counts,
HNA_counts = HNA,
LNA_counts = LNA) %>%
mutate(cells_per_uL = (raw_counts*dilution)/volume_uL,
HNA_per_uL = (HNA_counts*dilution)/volume_uL,
HNA_percent = (HNA_counts)/raw_counts*100,
LNA_per_uL = (LNA_counts*dilution)/volume_uL,
LNA_percent = (LNA_counts)/raw_counts*100,
Nuc_acid_ratio = HNA_per_uL/LNA_per_uL)
all_flow_cy$Season[all_flow_cy$Season == "Winter"] = "Fall"
all_flow_cy$Month[all_flow_cy$Month == "Januari"] = "October"
all_flow_cy$Site <- factor(all_flow_cy$Site, levels = c("MM110","MM45","MM15","MOT", "MDP", "MBR", "MLB","MIN"))
### HNA PER uL
lim_all_HNA_per_uL <- c(min(all_flow_cy$HNA_per_uL) - 100,max(all_flow_cy$HNA_per_uL) + 100)
# Set up the main plot
all_Lakes_surface_HNA <- ggplot(filter(all_flow_cy, Depth == "Surface"),
aes(x = Site, y = HNA_per_uL, fill = Site, color = Season)) +
geom_boxplot(alpha = 0.5, color = "black", show.legend = FALSE) +
scale_y_continuous(limits = lim_all_HNA_per_uL) +
scale_fill_viridis(option = "viridis", discrete = TRUE) +
scale_color_brewer(palette="Set1") +
geom_jitter(size = 3.5) + facet_grid(.~Year, scales = "free_x") + ggtitle("Surface samples only")
all_Lakes_deep_HNA <- ggplot(filter(all_flow_cy, Depth == "Deep"),
aes(x = Site, y = HNA_per_uL, fill = Site, color = Season)) +
geom_boxplot(alpha = 0.5, color = "black", show.legend = FALSE) +
scale_y_continuous(limits = lim_all_HNA_per_uL) +
scale_fill_viridis(option = "viridis", discrete = TRUE) +
scale_color_brewer(palette="Set1") +
geom_jitter(size = 3.5) + facet_grid(.~Year, scales = "free_x") + ggtitle("Deep samples only")
all_Lakes_all_HNA <- ggplot(filter(all_flow_cy),
aes(x = Site, y = HNA_per_uL, fill = Site, color = Season)) +
geom_boxplot(alpha = 0.5, color = "black", show.legend = FALSE) +
scale_y_continuous(limits = lim_all_HNA_per_uL) +
scale_fill_viridis(option = "viridis", discrete = TRUE) +
scale_color_brewer(palette="Set1") +
geom_jitter(size = 3.5) + facet_grid(.~Year, scales = "free_x") + ggtitle("All samples")
all_Lakes_HNA_per_uL_plots <- plot_grid(all_Lakes_surface_HNA, all_Lakes_deep_HNA, all_Lakes_all_HNA,
labels = c("A", "B", "C"),
align = "h", nrow = 3, ncol = 1)
# HNA: LNA RATIO
lim_all_HNA <- c(min(all_flow_cy$HNA_percent) - 0.05,max(all_flow_cy$HNA_percent) + 0.05)
all_Lakes_surface_HNA_percent <- ggplot(filter(all_flow_cy, Depth == "Surface"),
aes(x = Site, y = HNA_percent, fill = Site, color = Season)) +
geom_boxplot(alpha = 0.5, color = "black") +
scale_y_continuous(limits = lim_all_HNA) +
scale_fill_viridis(option = "viridis", discrete = TRUE) +
scale_color_brewer(palette="Set1") +
geom_jitter(size = 3.5) + facet_grid(.~Year, scales = "free_x") + ggtitle("Surface samples only")
all_Lakes_deep_HNA_percent <- ggplot(filter(all_flow_cy, Depth == "Deep"),
aes(x = Site, y = HNA_percent, fill = Site, color = Season)) +
geom_boxplot(alpha = 0.5, color = "black") +
scale_y_continuous(limits = lim_all_HNA) +
scale_fill_viridis(option = "viridis", discrete = TRUE) +
scale_color_brewer(palette="Set1") +
geom_jitter(size = 3.5) + facet_grid(.~Year, scales = "free_x") + ggtitle("Deep samples only")
all_Lakes_all_HNA_percent <- ggplot(filter(all_flow_cy),
aes(x = Site, y = HNA_percent, fill = Site, color = Season)) +
geom_boxplot(alpha = 0.5, color = "black") +
scale_y_continuous(limits = lim_all_HNA) +
scale_fill_viridis(option = "viridis", discrete = TRUE) +
scale_color_brewer(palette="Set1") +
geom_jitter(size = 3.5) + facet_grid(.~Year, scales = "free_x") + ggtitle("All samples")
all_Lakes_HNA_percent_plots <- plot_grid(all_Lakes_surface_HNA_percent,all_Lakes_deep_HNA_percent, all_Lakes_all_HNA_percent,
labels = c("A", "B", "C"),
align = "h", nrow = 3, ncol = 1)
# DO EVERYTHING
ALLTHEPLOTS <- plot_grid(all_Lakes_HNA_per_uL_plots,all_Lakes_HNA_percent_plots,
labels = c("1", "2"),
align = "h", nrow = 1, ncol = 2)
ALLTHEPLOTS
# IS there a relationship between total counts and HNA?
a <- ggplot(all_flow_cy, aes(x = cells_per_uL, y = HNA_per_uL, fill = Site, color = Site, shape = Season)) +
geom_point(size = 3.5) + ggtitle("HNA per uL") +
scale_color_viridis(option = "viridis", discrete = TRUE)
b <- ggplot(all_flow_cy, aes(x = cells_per_uL, y = HNA_percent, fill = Site, color = Site, shape = Season)) +
geom_point(size = 3.5) + ggtitle("Percent HNA")+
scale_color_viridis(option = "viridis", discrete = TRUE)
c <- ggplot(all_flow_cy, aes(x = cells_per_uL, y = LNA_per_uL, fill = Site, color = Site, shape = Season)) +
geom_point(size = 3.5) + ggtitle("LNA per uL") +
scale_color_viridis(option = "viridis", discrete = TRUE)
d <- ggplot(all_flow_cy, aes(x = cells_per_uL, y = LNA_percent, fill = Site, color = Site, shape = Season)) +
geom_point(size = 3.5) + ggtitle("Percent LNA")+
scale_color_viridis(option = "viridis", discrete = TRUE)
e <- plot_grid(a,b,c, d,
labels = c("A", "B", "C", "D"),
align = "h", nrow = 2, ncol = 2)
e
new <- gather(all_flow_cy, "Nucleic_Acid_Type", "NA_cells_per_uL", LNA_per_uL, HNA_per_uL) %>%
select(NA_cells_per_uL, Nucleic_Acid_Type, cells_per_uL, Month, Site, Season,Depth,Year)
p1 <- ggplot(new, aes(x = cells_per_uL, y = NA_cells_per_uL, color = Nucleic_Acid_Type)) +
geom_point(size = 3, alpha = 0.8) +
scale_color_manual(values = c("#675B78", "#EB7B88")) +
theme(legend.position = c(0.3, 0.85))
p2 <- ggplot(new, aes(x = cells_per_uL, y = NA_cells_per_uL, color = Season)) +
geom_point(size = 3, alpha = 0.8) +
scale_color_manual(values = c("#51BAAE", "#241B50", "#FD0F27")) +
theme(legend.position = c(0.15, 0.85))
# Ratio of HNA:LNA is not dependent on the number cells per uL!!!!
p3 <- ggplot(all_flow_cy, aes(x = cells_per_uL, y = Nuc_acid_ratio, color = Season)) +
geom_point(size = 3, alpha = 0.8) +
scale_color_manual(values = c("#51BAAE", "#241B50", "#FD0F27")) +
geom_smooth(method = "lm") +
theme(legend.position = c(0.15, 0.85))
p4 <- ggplot(all_flow_cy, aes(x = cells_per_uL, y = Nuc_acid_ratio, color = Season, shape = as.factor(Year))) +
geom_point(size = 3, alpha = 0.8) +
scale_color_manual(values = c("#51BAAE", "#241B50", "#FD0F27")) +
geom_smooth(method = "lm") +
theme(legend.position = c(0.85, 0.75))
ggplot(all_flow_cy, aes(x = cells_per_uL, y = Nuc_acid_ratio, color = as.factor(Year), shape = Season)) +
geom_point(size = 5, alpha = 0.8) +
scale_color_manual(values = c("#FD7400","#211426", "#BEDB39")) +
theme(legend.position = c(0.15, 0.85))
p5 <- plot_grid(p1, p2, p3, p4,
labels = c("A", "B", "C", "D"),
align = "h", nrow = 2, ncol = 2)
#ggsave("Figures/HNA_LNA.png", p5, dpi = 600, units = "in", width = 10, height = 8)